diff --git a/.dockerignore b/.dockerignore index eff3dca4f..a25c7ee96 100644 --- a/.dockerignore +++ b/.dockerignore @@ -4,7 +4,7 @@ scripts/ tests/ examples/ -local-server/ +local_server/ *.md *.pyc .dockerignore diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 000000000..c072578a3 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,40 @@ +## Pull Request (PR) Checklist +If you'd like to contribute, please follow the checklist below when submitting a PR. This will help us review and merge your changes faster! Thank you for contributing! + +1. **Type of PR**: Indicate the type of PR by adding a label in square brackets at the beginning of the title, such as `[Bugfix]`, `[Feature]`, `[Enhancement]`, `[Refactor]`, or `[Documentation]`. + +2. **Short Description**: Provide a brief, informative description of the PR that explains the changes made. + +3. **Issue(s) Linked**: Mention any related issue(s) by using the keyword `Fixes` or `Closes` followed by the respective issue number(s) (e.g., Fixes #123, Closes #456). + +4. **Branch**: Ensure that you have created a new branch for the changes, and it is based on the latest version of the `main` branch. + +5. **Code Changes**: Make sure the code changes are minimal, focused, and relevant to the issue or feature being addressed. + +6. **Commit Messages**: Write clear and concise commit messages that explain the purpose of each commit. + +7. **Tests**: Include unit tests and/or integration tests for any new code or changes to existing code. Make sure all tests pass before submitting the PR. + +8. **Documentation**: Update relevant documentation (e.g., README, inline comments, or external documentation) to reflect any changes made. + +9. **Review Requested**: Request a review from at least one other contributor or maintainer of the repository. + +10. **Video Submission** (For Complex/Large PRs): If your PR introduces significant changes, complexities, or a large number of lines of code, submit a brief video walkthrough along with the PR. The video should explain the purpose of the changes, the logic behind them, and how they address the issue or add the proposed feature. This will help reviewers to better understand your contribution and expedite the review process. + +## Pull Request Naming Convention + +Use the following naming convention for your PR branches: + +``` +/- +``` + +- ``: The type of PR, such as `bugfix`, `feature`, `enhancement`, `refactor`, or `docs`. Multiple types are ok and should appear as , +- ``: A brief description of the changes made, using hyphens to separate words. +- ``: The issue number associated with the changes made (if applicable). + +Example: + +``` +feature/advanced-chunking-strategy-123 +``` \ No newline at end of file diff --git a/README.md b/README.md index e03c06016..edd6f516a 100644 --- a/README.md +++ b/README.md @@ -8,17 +8,18 @@ Find an example video of a Retrieval Plugin that has access to the UN Annual Rep The ChatGPT Retrieval Plugin repository provides a flexible solution for semantic search and retrieval of personal or organizational documents using natural language queries. The repository is organized into several directories: -| Directory | Description | -| ----------------------------- | -------------------------------------------------------------------------------------------------------------------------- | -| [`datastore`](/datastore) | Contains the core logic for storing and querying document embeddings using various vector database providers. | -| [`docs`](/docs) | Includes documentation for setting up and using each vector database provider, webhooks, and removing unused dependencies. | -| [`examples`](/examples) | Provides example configurations, authentication methods, and provider-specific examples. | -| [`models`](/models) | Contains the data models used by the plugin, such as document and metadata models. | -| [`scripts`](/scripts) | Offers scripts for processing and uploading documents from different data sources. | -| [`server`](/server) | Houses the main FastAPI server implementation. | -| [`services`](/services) | Contains utility services for tasks like chunking, metadata extraction, and PII detection. | -| [`tests`](/tests) | Includes integration tests for various vector database providers. | -| [`.well-known`](/.well-known) | Stores the plugin manifest file and OpenAPI schema, which define the plugin configuration and API specification. | +| Directory | Description | +| ------------------------------- | -------------------------------------------------------------------------------------------------------------------------- | +| [`datastore`](/datastore) | Contains the core logic for storing and querying document embeddings using various vector database providers. | +| [`docs`](/docs) | Includes documentation for setting up and using each vector database provider, webhooks, and removing unused dependencies. | +| [`examples`](/examples) | Provides example configurations, authentication methods, and provider-specific examples. | +| [`local_server`](/local_server) | Contains an implementation of the retrieval plugin configured for localhost testing. | +| [`models`](/models) | Contains the data models used by the plugin, such as document and metadata models. | +| [`scripts`](/scripts) | Offers scripts for processing and uploading documents from different data sources. | +| [`server`](/server) | Houses the main FastAPI server implementation. | +| [`services`](/services) | Contains utility services for tasks like chunking, metadata extraction, and PII detection. | +| [`tests`](/tests) | Includes integration tests for various vector database providers. | +| [`.well-known`](/.well-known) | Stores the plugin manifest file and OpenAPI schema, which define the plugin configuration and API specification. | This README provides detailed information on how to set up, develop, and deploy the ChatGPT Retrieval Plugin. @@ -44,6 +45,9 @@ This README provides detailed information on how to set up, develop, and deploy - [Llama Index](#llamaindex) - [Chroma](#chroma) - [Azure Cognitive Search](#azure-cognitive-search) + - [Supabase](#supabase) + - [Postgres](#postgres) + - [AnalyticDB](#analyticdb) - [Running the API Locally](#running-the-api-locally) - [Testing a Localhost Plugin in ChatGPT](#testing-a-localhost-plugin-in-chatgpt) - [Personalization](#personalization) @@ -116,6 +120,15 @@ Follow these steps to quickly set up and run the ChatGPT Retrieval Plugin: export QDRANT_API_KEY= export QDRANT_COLLECTION= + # AnalyticDB + export PG_HOST= + export PG_PORT= + export PG_USER= + export PG_PASSWORD= + export PG_DATABASE= + export PG_COLLECTION= + + # Redis export REDIS_HOST= export REDIS_PORT= @@ -142,6 +155,17 @@ Follow these steps to quickly set up and run the ChatGPT Retrieval Plugin: export AZURESEARCH_SERVICE= export AZURESEARCH_INDEX= export AZURESEARCH_API_KEY= (optional, uses key-free managed identity if not set) + + # Supabase + export SUPABASE_URL= + export SUPABASE_ANON_KEY= + + # Postgres + export PG_HOST= + export PG_PORT= + export PG_USER= + export PG_PASSWORD= + export PG_DATABASE= ``` 10. Run the API locally: `poetry run start` @@ -178,7 +202,7 @@ This is a plugin for ChatGPT that enables semantic search and retrieval of perso The plugin uses OpenAI's `text-embedding-ada-002` embeddings model to generate embeddings of document chunks, and then stores and queries them using a vector database on the backend. As an open-source and self-hosted solution, developers can deploy their own Retrieval Plugin and register it with ChatGPT. The Retrieval Plugin supports several vector database providers, allowing developers to choose their preferred one from a list. -A FastAPI server exposes the plugin's endpoints for upserting, querying, and deleting documents. Users can refine their search results by using metadata filters by source, date, author, or other criteria. The plugin can be hosted on any cloud platform that supports Docker containers, such as Fly.io, Heroku or Azure Container Apps. To keep the vector database updated with the latest documents, the plugin can process and store documents from various data sources continuously, using incoming webhooks to the upsert and delete endpoints. Tools like [Zapier](https://zapier.com) or [Make](https://www.make.com) can help configure the webhooks based on events or schedules. +A FastAPI server exposes the plugin's endpoints for upserting, querying, and deleting documents. Users can refine their search results by using metadata filters by source, date, author, or other criteria. The plugin can be hosted on any cloud platform that supports Docker containers, such as Fly.io, Heroku, Render, or Azure Container Apps. To keep the vector database updated with the latest documents, the plugin can process and store documents from various data sources continuously, using incoming webhooks to the upsert and delete endpoints. Tools like [Zapier](https://zapier.com) or [Make](https://www.make.com) can help configure the webhooks based on events or schedules. ### Memory Feature @@ -253,20 +277,20 @@ poetry install The API requires the following environment variables to work: -| Name | Required | Description | -| ---------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `DATASTORE` | Yes | This specifies the vector database provider you want to use to store and query embeddings. You can choose from `chroma`, `pinecone`, `weaviate`, `zilliz`, `milvus`, `qdrant`, `redis`, `azuresearch`. | -| `BEARER_TOKEN` | Yes | This is a secret token that you need to authenticate your requests to the API. You can generate one using any tool or method you prefer, such as [jwt.io](https://jwt.io/). | -| `OPENAI_API_KEY` | Yes | This is your OpenAI API key that you need to generate embeddings using the `text-embedding-ada-002` model. You can get an API key by creating an account on [OpenAI](https://openai.com/). | +| Name | Required | Description | +| ---------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `DATASTORE` | Yes | This specifies the vector database provider you want to use to store and query embeddings. You can choose from `chroma`, `pinecone`, `weaviate`, `zilliz`, `milvus`, `qdrant`, `redis`, `azuresearch`, `supabase`, `postgres`, `analyticdb`. | +| `BEARER_TOKEN` | Yes | This is a secret token that you need to authenticate your requests to the API. You can generate one using any tool or method you prefer, such as [jwt.io](https://jwt.io/). | +| `OPENAI_API_KEY` | Yes | This is your OpenAI API key that you need to generate embeddings using the `text-embedding-ada-002` model. You can get an API key by creating an account on [OpenAI](https://openai.com/). | ### Using the plugin with Azure OpenAI The Azure Open AI uses URLs that are specific to your resource and references models not by model name but by the deployment id. As a result, you need to set additional environment variables for this case. -In addition to the OPENAI_API_BASE (your specific URL) and OPENAI_API_TYPE (azure), you should also set OPENAI_EMBEDDINGMODEL_DEPLOYMENTID which specifies the model to use for getting embeddings on upsert and query. For this, we recommend deploying text-embedding-ada-002 model and using the deployment name here. +In addition to the `OPENAI_API_BASE` (your specific URL) and `OPENAI_API_TYPE` (azure), you should also set `OPENAI_EMBEDDINGMODEL_DEPLOYMENTID` which specifies the model to use for getting embeddings on upsert and query. For this, we recommend deploying `text-embedding-ada-002` model and using the deployment name here. -If you wish to use the data preparation scripts, you will also need to set OPENAI_METADATA_EXTRACTIONMODEL_DEPLOYMENTID, used for metadata extraction and -OPENAI_COMPLETIONMODEL_DEPLOYMENTID, used for PII handling. +If you wish to use the data preparation scripts, you will also need to set `OPENAI_METADATA_EXTRACTIONMODEL_DEPLOYMENTID`, used for metadata extraction and +`OPENAI_COMPLETIONMODEL_DEPLOYMENTID`, used for PII handling. ### Choosing a Vector Database @@ -316,6 +340,18 @@ For detailed setup instructions, refer to [`/docs/providers/llama/setup.md`](/do [Azure Cognitive Search](https://azure.microsoft.com/products/search/) is a complete retrieval cloud service that supports vector search, text search, and hybrid (vectors + text combined to yield the best of the two approaches). It also offers an [optional L2 re-ranking step](https://learn.microsoft.com/azure/search/semantic-search-overview) to further improve results quality. For detailed setup instructions, refer to [`/docs/providers/azuresearch/setup.md`](/docs/providers/azuresearch/setup.md) +#### Supabase + +[Supabase](https://supabase.com/blog/openai-embeddings-postgres-vector) offers an easy and efficient way to store vectors via [pgvector](https://github.com/pgvector/pgvector) extension for Postgres Database. [You can use Supabase CLI](https://github.com/supabase/cli) to set up a whole Supabase stack locally or in the cloud or you can also use docker-compose, k8s and other options available. For a hosted/managed solution, try [Supabase.com](https://supabase.com/) and unlock the full power of Postgres with built-in authentication, storage, auto APIs, and Realtime features. For detailed setup instructions, refer to [`/docs/providers/supabase/setup.md`](/docs/providers/supabase/setup.md). + +#### Postgres + +[Postgres](https://www.postgresql.org) offers an easy and efficient way to store vectors via [pgvector](https://github.com/pgvector/pgvector) extension. To use pgvector, you will need to set up a PostgreSQL database with the pgvector extension enabled. For example, you can [use docker](https://www.docker.com/blog/how-to-use-the-postgres-docker-official-image/) to run locally. For a hosted/managed solution, you can use any of the cloud vendors which support [pgvector](https://github.com/pgvector/pgvector#hosted-postgres). For detailed setup instructions, refer to [`/docs/providers/postgres/setup.md`](/docs/providers/postgres/setup.md). + +#### AnalyticDB + +[AnalyticDB](https://www.alibabacloud.com/help/en/analyticdb-for-postgresql/latest/product-introduction-overview) is a distributed cloud-native vector database designed for storing documents and vector embeddings. It is fully compatible with PostgreSQL syntax and managed by Alibaba Cloud. AnalyticDB offers a powerful vector compute engine, processing billions of data vectors and providing features such as indexing algorithms, structured and unstructured data capabilities, real-time updates, distance metrics, scalar filtering, and time travel searches. For detailed setup instructions, refer to [`/docs/providers/analyticdb/setup.md`](/docs/providers/analyticdb/setup.md). + ### Running the API locally To run the API locally, you first need to set the requisite environment variables with the `export` command: @@ -339,7 +375,7 @@ Append `docs` to the URL shown in the terminal and open it in a browser to acces ### Testing a Localhost Plugin in ChatGPT -To test a localhost plugin in ChatGPT, use the provided [`local-server/main.py`](/local-server/main.py) file, which is specifically configured for localhost testing with CORS settings, no authentication and routes for the manifest, OpenAPI schema and logo. +To test a localhost plugin in ChatGPT, use the provided [`local_server/main.py`](/local_server/main.py) file, which is specifically configured for localhost testing with CORS settings, no authentication and routes for the manifest, OpenAPI schema and logo. Follow these steps to test your localhost plugin: @@ -385,23 +421,20 @@ Consider the benefits and drawbacks of each authentication method before choosin You can deploy your app to different cloud providers, depending on your preferences and requirements. However, regardless of the provider you choose, you will need to update two files in your app: [openapi.yaml](/.well-known/openapi.yaml) and [ai-plugin.json](/.well-known/ai-plugin.json). As outlined above, these files define the API specification and the AI plugin configuration for your app, respectively. You need to change the url field in both files to match the address of your deployed app. -Before deploying your app, you might want to remove unused dependencies from your [pyproject.toml](/pyproject.toml) file to reduce the size of your app and improve its performance. Depending on the vector database provider you choose, you can remove the packages that are not needed for your specific provider. Refer to the respective documentation in the [`/docs/deployment/removing-unused-dependencies.md`](/docs/deployment/removing-unused-dependencies.md) file for information on removing unused dependencies for each provider. +Render has a 1-click deploy option that automatically updates the url field in both files: -Once you have deployed your app, consider uploading an initial batch of documents using one of [these scripts](/scripts) or by calling the `/upsert` endpoint. +[Deploy to Render](https://render.com/deploy?repo=https://github.com/render-examples/chatgpt-retrieval-plugin/tree/main) + +Before deploying your app, you might want to remove unused dependencies from your [pyproject.toml](/pyproject.toml) file to reduce the size of your app and improve its performance. Depending on the vector database provider you choose, you can remove the packages that are not needed for your specific provider. Refer to the respective documentation in the [`/docs/deployment/removing-unused-dependencies.md`](/docs/deployment/removing-unused-dependencies.md) file for information on removing unused dependencies for each provider. -- **Chroma:** Remove `pinecone-client`, `weaviate-client`, `pymilvus`, `qdrant-client`, and `redis`. -- **Pinecone:** Remove `chromadb`, `weaviate-client`, `pymilvus`, `qdrant-client`, and `redis`. -- **Weaviate:** Remove `chromadb`, `pinecone-client`, `pymilvus`, `qdrant-client`, and `redis`. -- **Zilliz:** Remove `chromadb`, `pinecone-client`, `weaviate-client`, `qdrant-client`, and `redis`. -- **Milvus:** Remove `chromadb`, `pinecone-client`, `weaviate-client`, `qdrant-client`, and `redis`. -- **Qdrant:** Remove `chromadb`, `pinecone-client`, `weaviate-client`, `pymilvus`, and `redis`. -- **Redis:** Remove `chromadb`, `pinecone-client`, `weaviate-client`, `pymilvus`, and `qdrant-client`. +Instructions: - [Deploying to Fly.io](/docs/deployment/flyio.md) - [Deploying to Heroku](/docs/deployment/heroku.md) +- [Deploying to Render](/docs/deployment/render.md) - [Other Deployment Options](/docs/deployment/other-options.md) (Azure Container Apps, Google Cloud Run, AWS Elastic Container Service, etc.) -After you create your app, make sure to change the plugin url in your plugin manifest file [here](/.well-known/ai-plugin.json), and in your OpenAPI schema [here](/.well-known/openapi.yaml), and redeploy. +Once you have deployed your app, consider uploading an initial batch of documents using one of [these scripts](/scripts) or by calling the `/upsert` endpoint. ## Installing a Developer Plugin @@ -455,6 +488,47 @@ The scripts are: - [`process_jsonl`](scripts/process_jsonl/): This script processes a file dump of documents in a JSONL format and stores them in the vector database with some metadata. The format of the JSONL file should be a newline-delimited JSON file, where each line is a valid JSON object representing a document. The JSON object should have a `text` field and optionally other fields to populate the metadata. You can provide custom metadata as a JSON string and flags to screen for PII and extract metadata. - [`process_zip`](scripts/process_zip/): This script processes a file dump of documents in a zip file and stores them in the vector database with some metadata. The format of the zip file should be a flat zip file folder of docx, pdf, txt, md, pptx or csv files. You can provide custom metadata as a JSON string and flags to screen for PII and extract metadata. +## Pull Request (PR) Checklist +If you'd like to contribute, please follow the checklist below when submitting a PR. This will help us review and merge your changes faster! Thank you for contributing! + +1. **Type of PR**: Indicate the type of PR by adding a label in square brackets at the beginning of the title, such as `[Bugfix]`, `[Feature]`, `[Enhancement]`, `[Refactor]`, or `[Documentation]`. + +2. **Short Description**: Provide a brief, informative description of the PR that explains the changes made. + +3. **Issue(s) Linked**: Mention any related issue(s) by using the keyword `Fixes` or `Closes` followed by the respective issue number(s) (e.g., Fixes #123, Closes #456). + +4. **Branch**: Ensure that you have created a new branch for the changes, and it is based on the latest version of the `main` branch. + +5. **Code Changes**: Make sure the code changes are minimal, focused, and relevant to the issue or feature being addressed. + +6. **Commit Messages**: Write clear and concise commit messages that explain the purpose of each commit. + +7. **Tests**: Include unit tests and/or integration tests for any new code or changes to existing code. Make sure all tests pass before submitting the PR. + +8. **Documentation**: Update relevant documentation (e.g., README, inline comments, or external documentation) to reflect any changes made. + +9. **Review Requested**: Request a review from at least one other contributor or maintainer of the repository. + +10. **Video Submission** (For Complex/Large PRs): If your PR introduces significant changes, complexities, or a large number of lines of code, submit a brief video walkthrough along with the PR. The video should explain the purpose of the changes, the logic behind them, and how they address the issue or add the proposed feature. This will help reviewers to better understand your contribution and expedite the review process. + +## Pull Request Naming Convention + +Use the following naming convention for your PR branches: + +``` +/- +``` + +- ``: The type of PR, such as `bugfix`, `feature`, `enhancement`, `refactor`, or `docs`. Multiple types are ok and should appear as , +- ``: A brief description of the changes made, using hyphens to separate words. +- ``: The issue number associated with the changes made (if applicable). + +Example: + +``` +feature/advanced-chunking-strategy-123 +``` + ## Limitations While the ChatGPT Retrieval Plugin is designed to provide a flexible solution for semantic search and retrieval, it does have some limitations: @@ -506,3 +580,8 @@ We would like to extend our gratitude to the following contributors for their co - [LlamaIndex](https://github.com/jerryjliu/llama_index) - [jerryjliu](https://github.com/jerryjliu) - [Disiok](https://github.com/Disiok) +- [Supabase](https://supabase.com/) + - [egor-romanov](https://github.com/egor-romanov) +- [Postgres](https://www.postgresql.org/) + - [egor-romanov](https://github.com/egor-romanov) + - [mmmaia](https://github.com/mmmaia) diff --git a/datastore/factory.py b/datastore/factory.py index 026798899..adde49d76 100644 --- a/datastore/factory.py +++ b/datastore/factory.py @@ -13,6 +13,7 @@ async def get_datastore() -> DataStore: return ChromaDataStore() case "llama": from datastore.providers.llama_datastore import LlamaDataStore + return LlamaDataStore() case "pinecone": @@ -43,6 +44,18 @@ async def get_datastore() -> DataStore: from datastore.providers.azuresearch_datastore import AzureSearchDataStore return AzureSearchDataStore() + case "supabase": + from datastore.providers.supabase_datastore import SupabaseDataStore + + return SupabaseDataStore() + case "postgres": + from datastore.providers.postgres_datastore import PostgresDataStore + + return PostgresDataStore() + case "analyticdb": + from datastore.providers.analyticdb_datastore import AnalyticDBDataStore + + return AnalyticDBDataStore() case _: raise ValueError( f"Unsupported vector database: {datastore}. " diff --git a/datastore/providers/analyticdb_datastore.py b/datastore/providers/analyticdb_datastore.py new file mode 100644 index 000000000..ba206f2e1 --- /dev/null +++ b/datastore/providers/analyticdb_datastore.py @@ -0,0 +1,312 @@ +import os +import asyncio +from typing import Dict, List, Optional, Tuple, Any +from datetime import datetime +from loguru import logger + +from psycopg2cffi import compat + +compat.register() +import psycopg2 +from psycopg2.extras import DictCursor +from psycopg2.pool import SimpleConnectionPool + +from services.date import to_unix_timestamp +from datastore.datastore import DataStore +from models.models import ( + DocumentChunk, + DocumentChunkMetadata, + DocumentMetadataFilter, + QueryResult, + QueryWithEmbedding, + DocumentChunkWithScore, +) + +PG_CONFIG = { + "collection": os.environ.get("PG_COLLECTION", "document_chunks"), + "database": os.environ.get("PG_DATABASE", "postgres"), + "user": os.environ.get("PG_USER", "user"), + "password": os.environ.get("PG_PASSWORD", "password"), + "host": os.environ.get("PG_HOST", "localhost"), + "port": int(os.environ.get("PG_PORT", "5432")), +} +OUTPUT_DIM = 1536 + + +class AnalyticDBDataStore(DataStore): + def __init__(self, config: Dict[str, str] = PG_CONFIG): + self.collection_name = config["collection"] + self.user = config["user"] + self.password = config["password"] + self.database = config["database"] + self.host = config["host"] + self.port = config["port"] + + self.connection_pool = SimpleConnectionPool( + minconn=1, + maxconn=100, + dbname=self.database, + user=self.user, + password=self.password, + host=self.host, + port=self.port, + ) + + self._initialize_db() + + def _initialize_db(self): + conn = self.connection_pool.getconn() + try: + with conn.cursor() as cur: + self._create_table(cur) + self._create_embedding_index(cur) + conn.commit() + finally: + self.connection_pool.putconn(conn) + + def _create_table(self, cur: psycopg2.extensions.cursor): + cur.execute( + f""" + CREATE TABLE IF NOT EXISTS {self.collection_name} ( + id TEXT PRIMARY KEY DEFAULT uuid_generate_v4()::TEXT, + source TEXT, + source_id TEXT, + content TEXT, + document_id TEXT, + author TEXT, + url TEXT, + created_at TIMESTAMPTZ DEFAULT NOW(), + embedding real[] + ); + """ + ) + + def _create_embedding_index(self, cur: psycopg2.extensions.cursor): + cur.execute( + f""" + SELECT * FROM pg_indexes WHERE tablename='{self.collection_name}'; + """ + ) + index_exists = any( + index[2] == f"{self.collection_name}_embedding_idx" + for index in cur.fetchall() + ) + if not index_exists: + cur.execute( + f""" + CREATE INDEX {self.collection_name}_embedding_idx + ON {self.collection_name} + USING ann(embedding) + WITH ( + distancemeasure=L2, + dim=OUTPUT_DIM, + pq_segments=64, + hnsw_m=100, + pq_centers=2048 + ); + """ + ) + + async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]: + """ + Takes in a dict of document_ids to list of document chunks and inserts them into the database. + Return a list of document ids. + """ + loop = asyncio.get_event_loop() + tasks = [ + loop.run_in_executor(None, self._upsert_chunk, chunk) + for document_chunks in chunks.values() + for chunk in document_chunks + ] + await asyncio.gather(*tasks) + + return list(chunks.keys()) + + def _upsert_chunk(self, chunk: DocumentChunk): + created_at = ( + datetime.fromtimestamp(to_unix_timestamp(chunk.metadata.created_at)) + if chunk.metadata.created_at + else None + ) + data = ( + chunk.id, + chunk.text, + chunk.embedding, + chunk.metadata.document_id, + chunk.metadata.source, + chunk.metadata.source_id, + chunk.metadata.url, + chunk.metadata.author, + created_at, + ) + + conn = self.connection_pool.getconn() + try: + with conn.cursor() as cur: + # Construct the SQL query and data + query = f""" + INSERT INTO {self.collection_name} (id, content, embedding, document_id, source, source_id, url, author, created_at) + VALUES (%s::text, %s::text, %s::real[], %s::text, %s::text, %s::text, %s::text, %s::text, %s::timestamp with time zone) + ON CONFLICT (id) DO UPDATE SET + content = EXCLUDED.content, + embedding = EXCLUDED.embedding, + document_id = EXCLUDED.document_id, + source = EXCLUDED.source, + source_id = EXCLUDED.source_id, + url = EXCLUDED.url, + author = EXCLUDED.author, + created_at = EXCLUDED.created_at; + """ + + # Execute the query + cur.execute(query, data) + + # Commit the transaction + conn.commit() + finally: + self.connection_pool.putconn(conn) + + async def _query(self, queries: List[QueryWithEmbedding]) -> List[QueryResult]: + """ + Takes in a list of queries with embeddings and filters and returns a list of query results with matching document chunks and scores. + """ + query_results: List[QueryResult] = [] + + def generate_query(query: QueryWithEmbedding) -> Tuple[str, List[Any]]: + embedding = "[" + ", ".join(str(x) for x in query.embedding) + "]" + q = f""" + SELECT + id, + content, + source, + source_id, + document_id, + url, + created_at, + author, + embedding, + l2_distance(embedding,array{embedding}::real[]) AS similarity + FROM + {self.collection_name} + """ + where_clause, params = generate_where_clause(query.filter) + q += where_clause + q += f"ORDER BY embedding <-> array{embedding}::real[] LIMIT {query.top_k};" + return q, params + + def generate_where_clause( + query_filter: Optional[DocumentMetadataFilter], + ) -> Tuple[str, List[Any]]: + if query_filter is None: + return "", [] + + conditions = [ + ("document_id=%s", query_filter.document_id), + ("source_id=%s", query_filter.source_id), + ("source LIKE %s", query_filter.source), + ("author LIKE %s", query_filter.author), + ("created_at >= %s", query_filter.start_date), + ("created_at <= %s", query_filter.end_date), + ] + + where_clause = "WHERE " + " AND ".join( + [cond[0] for cond in conditions if cond[1] is not None] + ) + + values = [cond[1] for cond in conditions if cond[1] is not None] + + return where_clause, values + + def fetch_data(cur, q: str, params: List[Any]): + cur.execute(q, params) + return cur.fetchall() + + def create_results(data): + results = [] + for row in data: + document_chunk = DocumentChunkWithScore( + id=row["id"], + text=row["content"], + score=float(row["similarity"]), + metadata=DocumentChunkMetadata( + source=row["source"], + source_id=row["source_id"], + document_id=row["document_id"], + url=row["url"], + created_at=str(row["created_at"]), + author=row["author"], + ), + ) + results.append(document_chunk) + return results + + conn = self.connection_pool.getconn() + try: + for query in queries: + try: + cur = conn.cursor(cursor_factory=DictCursor) + for query in queries: + q, params = generate_query(query) + data = fetch_data(cur, q, params) + results = create_results(data) + query_results.append( + QueryResult(query=query.query, results=results) + ) + except Exception as e: + logger.error(e) + query_results.append(QueryResult(query=query.query, results=[])) + return query_results + finally: + self.connection_pool.putconn(conn) + + async def delete( + self, + ids: Optional[List[str]] = None, + filter: Optional[DocumentMetadataFilter] = None, + delete_all: Optional[bool] = None, + ) -> bool: + async def execute_delete(query: str, params: Optional[List] = None) -> bool: + conn = self.connection_pool.getconn() + try: + with conn.cursor() as cur: + if params: + cur.execute(query, params) + else: + cur.execute(query) + self.conn.commit() + return True + except Exception as e: + logger.error(e) + return False + finally: + self.connection_pool.putconn(conn) + + if delete_all: + query = f"DELETE FROM {self.collection_name} WHERE document_id LIKE %s;" + return await execute_delete(query, ["%"]) + elif ids: + query = f"DELETE FROM {self.collection_name} WHERE document_id IN ({','.join(['%s'] * len(ids))});" + return await execute_delete(query, ids) + elif filter is not None: + query, params = self._generate_delete_query(filter) + return await execute_delete(query, params) + else: + return True + + def _generate_delete_query( + self, filter: DocumentMetadataFilter + ) -> Tuple[str, List]: + conditions = [ + (filter.document_id, "document_id = %s"), + (filter.source, "source = %s"), + (filter.source_id, "source_id = %s"), + (filter.author, "author = %s"), + (filter.start_date, "created_at >= %s"), + (filter.end_date, "created_at <= %s"), + ] + + where_conditions = [f for value, f in conditions if value] + where_values = [value for value, _ in conditions if value] + + query = f"DELETE FROM {self.collection_name} WHERE {' AND '.join(where_conditions)};" + return query, where_values diff --git a/datastore/providers/azuresearch_datastore.py b/datastore/providers/azuresearch_datastore.py index 4ae0182cc..3852258e3 100644 --- a/datastore/providers/azuresearch_datastore.py +++ b/datastore/providers/azuresearch_datastore.py @@ -28,7 +28,7 @@ # Allow overriding field names for Azure Search FIELDS_ID = os.environ.get("AZURESEARCH_FIELDS_ID", "id") FIELDS_TEXT = os.environ.get("AZURESEARCH_FIELDS_TEXT", "text") -FIELDS_EMBEDDING = os.environ.get("AZURESEARCH_FIELDS_TEXT", "embedding") +FIELDS_EMBEDDING = os.environ.get("AZURESEARCH_FIELDS_EMBEDDING", "embedding") FIELDS_DOCUMENT_ID = os.environ.get("AZURESEARCH_FIELDS_DOCUMENT_ID", "document_id") FIELDS_SOURCE = os.environ.get("AZURESEARCH_FIELDS_SOURCE", "source") FIELDS_SOURCE_ID = os.environ.get("AZURESEARCH_FIELDS_SOURCE_ID", "source_id") @@ -132,14 +132,16 @@ async def _single_query(self, query: QueryWithEmbedding) -> QueryResult: """ filter = self._translate_filter(query.filter) if query.filter is not None else None try: - k = query.top_k if filter is None else query.top_k * 2 + vector_top_k = query.top_k if filter is None else query.top_k * 2 q = query.query if not AZURESEARCH_DISABLE_HYBRID else None if AZURESEARCH_SEMANTIC_CONFIG != None and not AZURESEARCH_DISABLE_HYBRID: + # Ensure we're feeding a good number of candidates to the L2 reranker + vector_top_k = max(50, vector_top_k) r = await self.client.search( q, filter=filter, top=query.top_k, - vector=Vector(value=query.embedding, k=k, fields=FIELDS_EMBEDDING), + vector=Vector(value=query.embedding, k=vector_top_k, fields=FIELDS_EMBEDDING), query_type=QueryType.SEMANTIC, query_language=AZURESEARCH_LANGUAGE, semantic_configuration_name=AZURESEARCH_SEMANTIC_CONFIG) @@ -148,7 +150,7 @@ async def _single_query(self, query: QueryWithEmbedding) -> QueryResult: q, filter=filter, top=query.top_k, - vector=Vector(value=query.embedding, k=k, fields=FIELDS_EMBEDDING)) + vector=Vector(value=query.embedding, k=vector_top_k, fields=FIELDS_EMBEDDING)) results: List[DocumentChunkWithScore] = [] async for hit in r: f = lambda field: hit.get(field) if field != "-" else None diff --git a/datastore/providers/milvus_datastore.py b/datastore/providers/milvus_datastore.py index 202e86d55..d105cc4e9 100644 --- a/datastore/providers/milvus_datastore.py +++ b/datastore/providers/milvus_datastore.py @@ -2,6 +2,7 @@ import os import asyncio +from loguru import logger from typing import Dict, List, Optional from pymilvus import ( Collection, @@ -124,14 +125,6 @@ def __init__( self._create_collection(MILVUS_COLLECTION, create_new) # type: ignore self._create_index() - def _print_info(self, msg): - # TODO: logger - print(msg) - - def _print_err(self, msg): - # TODO: logger - print(msg) - def _get_schema(self): return SCHEMA_V1 if self._schema_ver == "V1" else SCHEMA_V2 @@ -143,7 +136,7 @@ def _create_connection(self): addr = connections.get_connection_addr(x[0]) if x[1] and ('address' in addr) and (addr['address'] == "{}:{}".format(MILVUS_HOST, MILVUS_PORT)): self.alias = x[0] - self._print_info("Reuse connection to Milvus server '{}:{}' with alias '{:s}'" + logger.info("Reuse connection to Milvus server '{}:{}' with alias '{:s}'" .format(MILVUS_HOST, MILVUS_PORT, self.alias)) break @@ -158,10 +151,10 @@ def _create_connection(self): password=MILVUS_PASSWORD, # type: ignore secure=MILVUS_USE_SECURITY, ) - self._print_info("Create connection to Milvus server '{}:{}' with alias '{:s}'" + logger.info("Create connection to Milvus server '{}:{}' with alias '{:s}'" .format(MILVUS_HOST, MILVUS_PORT, self.alias)) except Exception as e: - self._print_err("Failed to create connection to Milvus server '{}:{}', error: {}" + logger.error("Failed to create connection to Milvus server '{}:{}', error: {}" .format(MILVUS_HOST, MILVUS_PORT, e)) def _create_collection(self, collection_name, create_new: bool) -> None: @@ -189,7 +182,7 @@ def _create_collection(self, collection_name, create_new: bool) -> None: consistency_level=self._consistency_level, ) self._schema_ver = "V2" - self._print_info("Create Milvus collection '{}' with schema {} and consistency level {}" + logger.info("Create Milvus collection '{}' with schema {} and consistency level {}" .format(collection_name, self._schema_ver, self._consistency_level)) else: # If the collection exists, point to it @@ -201,10 +194,10 @@ def _create_collection(self, collection_name, create_new: bool) -> None: if field.name == "id" and field.is_primary: self._schema_ver = "V2" break - self._print_info("Milvus collection '{}' already exists with schema {}" + logger.info("Milvus collection '{}' already exists with schema {}" .format(collection_name, self._schema_ver)) except Exception as e: - self._print_err("Failed to create collection '{}', error: {}".format(collection_name, e)) + logger.error("Failed to create collection '{}', error: {}".format(collection_name, e)) def _create_index(self): # TODO: verify index/search params passed by os.environ @@ -216,7 +209,7 @@ def _create_index(self): if self.index_params is not None: # Convert the string format to JSON format parameters passed by MILVUS_INDEX_PARAMS self.index_params = json.loads(self.index_params) - self._print_info("Create Milvus index: {}".format(self.index_params)) + logger.info("Create Milvus index: {}".format(self.index_params)) # Create an index on the 'embedding' field with the index params found in init self.col.create_index(EMBEDDING_FIELD, index_params=self.index_params) else: @@ -227,24 +220,24 @@ def _create_index(self): "index_type": "HNSW", "params": {"M": 8, "efConstruction": 64}, } - self._print_info("Attempting creation of Milvus '{}' index".format(i_p["index_type"])) + logger.info("Attempting creation of Milvus '{}' index".format(i_p["index_type"])) self.col.create_index(EMBEDDING_FIELD, index_params=i_p) self.index_params = i_p - self._print_info("Creation of Milvus '{}' index successful".format(i_p["index_type"])) + logger.info("Creation of Milvus '{}' index successful".format(i_p["index_type"])) # If create fails, most likely due to being Zilliz Cloud instance, try to create an AutoIndex except MilvusException: - self._print_info("Attempting creation of Milvus default index") + logger.info("Attempting creation of Milvus default index") i_p = {"metric_type": "IP", "index_type": "AUTOINDEX", "params": {}} self.col.create_index(EMBEDDING_FIELD, index_params=i_p) self.index_params = i_p - self._print_info("Creation of Milvus default index successful") + logger.info("Creation of Milvus default index successful") # If an index already exists, grab its params else: # How about if the first index is not vector index? for index in self.col.indexes: idx = index.to_dict() if idx["field"] == EMBEDDING_FIELD: - self._print_info("Index already exists: {}".format(idx)) + logger.info("Index already exists: {}".format(idx)) self.index_params = idx['index_param'] break @@ -272,9 +265,9 @@ def _create_index(self): } # Set the search params self.search_params = default_search_params[self.index_params["index_type"]] - self._print_info("Milvus search parameters: {}".format(self.search_params)) + logger.info("Milvus search parameters: {}".format(self.search_params)) except Exception as e: - self._print_err("Failed to create index, error: {}".format(e)) + logger.error("Failed to create index, error: {}".format(e)) async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]: """Upsert chunks into the datastore. @@ -319,18 +312,18 @@ async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]: for batch in batches: if len(batch[0]) != 0: try: - self._print_info(f"Upserting batch of size {len(batch[0])}") + logger.info(f"Upserting batch of size {len(batch[0])}") self.col.insert(batch) - self._print_info(f"Upserted batch successfully") + logger.info(f"Upserted batch successfully") except Exception as e: - self._print_err(f"Failed to insert batch records, error: {e}") + logger.error(f"Failed to insert batch records, error: {e}") raise e # This setting perfoms flushes after insert. Small insert == bad to use # self.col.flush() return doc_ids except Exception as e: - self._print_err("Failed to insert records, error: {}".format(e)) + logger.error("Failed to insert records, error: {}".format(e)) return [] @@ -365,7 +358,7 @@ def _get_values(self, chunk: DocumentChunk) -> List[any] | None: # type: ignore x = values.get(key) or default # If one of our required fields is missing, ignore the entire entry if x is Required: - self._print_info("Chunk " + values["id"] + " missing " + key + " skipping") + logger.info("Chunk " + values["id"] + " missing " + key + " skipping") return None # Add the corresponding value if it passes the tests ret.append(x) @@ -436,7 +429,7 @@ async def _single_query(query: QueryWithEmbedding) -> QueryResult: return QueryResult(query=query.query, results=results) except Exception as e: - self._print_err("Failed to query, error: {}".format(e)) + logger.error("Failed to query, error: {}".format(e)) return QueryResult(query=query.query, results=[]) results: List[QueryResult] = await asyncio.gather( @@ -460,7 +453,7 @@ async def delete( # If deleting all, drop and create the new collection if delete_all: coll_name = self.col.name - self._print_info("Delete the entire collection {} and create new one".format(coll_name)) + logger.info("Delete the entire collection {} and create new one".format(coll_name)) # Release the collection from memory self.col.release() # Drop the collection @@ -490,7 +483,7 @@ async def delete( pks = ['"' + pk + '"' for pk in pks] # Delete by ids batch by batch(avoid too long expression) - self._print_info("Apply {:d} deletions to schema {:s}".format(len(pks), self._schema_ver)) + logger.info("Apply {:d} deletions to schema {:s}".format(len(pks), self._schema_ver)) while len(pks) > 0: batch_pks = pks[:batch_size] pks = pks[batch_size:] @@ -499,7 +492,7 @@ async def delete( # Increment our deleted count delete_count += int(res.delete_count) # type: ignore except Exception as e: - self._print_err("Failed to delete by ids, error: {}".format(e)) + logger.error("Failed to delete by ids, error: {}".format(e)) try: # Check if empty filter @@ -524,9 +517,9 @@ async def delete( # Increment our delete count delete_count += int(res.delete_count) # type: ignore except Exception as e: - self._print_err("Failed to delete by filter, error: {}".format(e)) + logger.error("Failed to delete by filter, error: {}".format(e)) - self._print_info("{:d} records deleted".format(delete_count)) + logger.info("{:d} records deleted".format(delete_count)) # This setting performs flushes after delete. Small delete == bad to use # self.col.flush() diff --git a/datastore/providers/pgvector_datastore.py b/datastore/providers/pgvector_datastore.py new file mode 100644 index 000000000..cd7026b23 --- /dev/null +++ b/datastore/providers/pgvector_datastore.py @@ -0,0 +1,181 @@ +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional +from datetime import datetime +from loguru import logger + +from services.date import to_unix_timestamp +from datastore.datastore import DataStore +from models.models import ( + DocumentChunk, + DocumentChunkMetadata, + DocumentMetadataFilter, + QueryResult, + QueryWithEmbedding, + DocumentChunkWithScore, +) + + +# interface for Postgres client to implement pg based Datastore providers +class PGClient(ABC): + @abstractmethod + async def upsert(self, table: str, json: dict[str, Any]) -> None: + """ + Takes in a list of documents and inserts them into the table. + """ + raise NotImplementedError + + @abstractmethod + async def rpc(self, function_name: str, params: dict[str, Any]) -> Any: + """ + Calls a stored procedure in the database with the given parameters. + """ + raise NotImplementedError + + @abstractmethod + async def delete_like(self, table: str, column: str, pattern: str) -> None: + """ + Deletes rows in the table that match the pattern. + """ + raise NotImplementedError + + @abstractmethod + async def delete_in(self, table: str, column: str, ids: List[str]) -> None: + """ + Deletes rows in the table that match the ids. + """ + raise NotImplementedError + + @abstractmethod + async def delete_by_filters( + self, table: str, filter: DocumentMetadataFilter + ) -> None: + """ + Deletes rows in the table that match the filter. + """ + raise NotImplementedError + + +# abstract class for Postgres based Datastore providers that implements DataStore interface +class PgVectorDataStore(DataStore): + def __init__(self): + self.client = self.create_db_client() + + @abstractmethod + def create_db_client(self) -> PGClient: + """ + Create db client, can be accessing postgres database via different APIs. + Can be supabase client or psycopg2 based client. + Return a client for postgres DB. + """ + + raise NotImplementedError + + async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]: + """ + Takes in a dict of document_ids to list of document chunks and inserts them into the database. + Return a list of document ids. + """ + for document_id, document_chunks in chunks.items(): + for chunk in document_chunks: + json = { + "id": chunk.id, + "content": chunk.text, + "embedding": chunk.embedding, + "document_id": document_id, + "source": chunk.metadata.source, + "source_id": chunk.metadata.source_id, + "url": chunk.metadata.url, + "author": chunk.metadata.author, + } + if chunk.metadata.created_at: + json["created_at"] = ( + datetime.fromtimestamp( + to_unix_timestamp(chunk.metadata.created_at) + ), + ) + await self.client.upsert("documents", json) + + return list(chunks.keys()) + + async def _query(self, queries: List[QueryWithEmbedding]) -> List[QueryResult]: + """ + Takes in a list of queries with embeddings and filters and returns a list of query results with matching document chunks and scores. + """ + query_results: List[QueryResult] = [] + for query in queries: + # get the top 3 documents with the highest cosine similarity using rpc function in the database called "match_page_sections" + params = { + "in_embedding": query.embedding, + } + if query.top_k: + params["in_match_count"] = query.top_k + if query.filter: + if query.filter.document_id: + params["in_document_id"] = query.filter.document_id + if query.filter.source: + params["in_source"] = query.filter.source.value + if query.filter.source_id: + params["in_source_id"] = query.filter.source_id + if query.filter.author: + params["in_author"] = query.filter.author + if query.filter.start_date: + params["in_start_date"] = datetime.fromtimestamp( + to_unix_timestamp(query.filter.start_date) + ) + if query.filter.end_date: + params["in_end_date"] = datetime.fromtimestamp( + to_unix_timestamp(query.filter.end_date) + ) + try: + data = await self.client.rpc("match_page_sections", params=params) + results: List[DocumentChunkWithScore] = [] + for row in data: + document_chunk = DocumentChunkWithScore( + id=row["id"], + text=row["content"], + # TODO: add embedding to the response ? + # embedding=row["embedding"], + score=float(row["similarity"]), + metadata=DocumentChunkMetadata( + source=row["source"], + source_id=row["source_id"], + document_id=row["document_id"], + url=row["url"], + created_at=row["created_at"], + author=row["author"], + ), + ) + results.append(document_chunk) + query_results.append(QueryResult(query=query.query, results=results)) + except Exception as e: + logger.error(e) + query_results.append(QueryResult(query=query.query, results=[])) + return query_results + + async def delete( + self, + ids: Optional[List[str]] = None, + filter: Optional[DocumentMetadataFilter] = None, + delete_all: Optional[bool] = None, + ) -> bool: + """ + Removes vectors by ids, filter, or everything in the datastore. + Multiple parameters can be used at once. + Returns whether the operation was successful. + """ + if delete_all: + try: + await self.client.delete_like("documents", "document_id", "%") + except: + return False + elif ids: + try: + await self.client.delete_in("documents", "document_id", ids) + except: + return False + elif filter: + try: + await self.client.delete_by_filters("documents", filter) + except: + return False + return True diff --git a/datastore/providers/pinecone_datastore.py b/datastore/providers/pinecone_datastore.py index 2896cf66b..c10ee2bea 100644 --- a/datastore/providers/pinecone_datastore.py +++ b/datastore/providers/pinecone_datastore.py @@ -3,6 +3,7 @@ import pinecone from tenacity import retry, wait_random_exponential, stop_after_attempt import asyncio +from loguru import logger from datastore.datastore import DataStore from models.models import ( @@ -41,7 +42,7 @@ def __init__(self): # Create a new index with the specified name, dimension, and metadata configuration try: - print( + logger.info( f"Creating index {PINECONE_INDEX} with metadata config {fields_to_index}" ) pinecone.create_index( @@ -50,18 +51,18 @@ def __init__(self): metadata_config={"indexed": fields_to_index}, ) self.index = pinecone.Index(PINECONE_INDEX) - print(f"Index {PINECONE_INDEX} created successfully") + logger.info(f"Index {PINECONE_INDEX} created successfully") except Exception as e: - print(f"Error creating index {PINECONE_INDEX}: {e}") + logger.error(f"Error creating index {PINECONE_INDEX}: {e}") raise e elif PINECONE_INDEX and PINECONE_INDEX in pinecone.list_indexes(): # Connect to an existing index with the specified name try: - print(f"Connecting to existing index {PINECONE_INDEX}") + logger.info(f"Connecting to existing index {PINECONE_INDEX}") self.index = pinecone.Index(PINECONE_INDEX) - print(f"Connected to index {PINECONE_INDEX} successfully") + logger.info(f"Connected to index {PINECONE_INDEX} successfully") except Exception as e: - print(f"Error connecting to index {PINECONE_INDEX}: {e}") + logger.error(f"Error connecting to index {PINECONE_INDEX}: {e}") raise e @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(3)) @@ -78,7 +79,7 @@ async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]: for doc_id, chunk_list in chunks.items(): # Append the id to the ids list doc_ids.append(doc_id) - print(f"Upserting document_id: {doc_id}") + logger.info(f"Upserting document_id: {doc_id}") for chunk in chunk_list: # Create a vector tuple of (id, embedding, metadata) # Convert the metadata object to a dict with unix timestamps for dates @@ -97,11 +98,11 @@ async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]: # Upsert each batch to Pinecone for batch in batches: try: - print(f"Upserting batch of size {len(batch)}") + logger.info(f"Upserting batch of size {len(batch)}") self.index.upsert(vectors=batch) - print(f"Upserted batch successfully") + logger.info(f"Upserted batch successfully") except Exception as e: - print(f"Error upserting batch: {e}") + logger.error(f"Error upserting batch: {e}") raise e return doc_ids @@ -117,7 +118,7 @@ async def _query( # Define a helper coroutine that performs a single query and returns a QueryResult async def _single_query(query: QueryWithEmbedding) -> QueryResult: - print(f"Query: {query.query}") + logger.debug(f"Query: {query.query}") # Convert the metadata filter object to a dict with pinecone filter expressions pinecone_filter = self._get_pinecone_filter(query.filter) @@ -132,7 +133,7 @@ async def _single_query(query: QueryWithEmbedding) -> QueryResult: include_metadata=True, ) except Exception as e: - print(f"Error querying index: {e}") + logger.error(f"Error querying index: {e}") raise e query_results: List[DocumentChunkWithScore] = [] @@ -184,12 +185,12 @@ async def delete( # Delete all vectors from the index if delete_all is True if delete_all: try: - print(f"Deleting all vectors from index") + logger.info(f"Deleting all vectors from index") self.index.delete(delete_all=True) - print(f"Deleted all vectors successfully") + logger.info(f"Deleted all vectors successfully") return True except Exception as e: - print(f"Error deleting all vectors: {e}") + logger.error(f"Error deleting all vectors: {e}") raise e # Convert the metadata filter object to a dict with pinecone filter expressions @@ -197,22 +198,22 @@ async def delete( # Delete vectors that match the filter from the index if the filter is not empty if pinecone_filter != {}: try: - print(f"Deleting vectors with filter {pinecone_filter}") + logger.info(f"Deleting vectors with filter {pinecone_filter}") self.index.delete(filter=pinecone_filter) - print(f"Deleted vectors with filter successfully") + logger.info(f"Deleted vectors with filter successfully") except Exception as e: - print(f"Error deleting vectors with filter: {e}") + logger.error(f"Error deleting vectors with filter: {e}") raise e # Delete vectors that match the document ids from the index if the ids list is not empty if ids is not None and len(ids) > 0: try: - print(f"Deleting vectors with ids {ids}") + logger.info(f"Deleting vectors with ids {ids}") pinecone_filter = {"document_id": {"$in": ids}} self.index.delete(filter=pinecone_filter) # type: ignore - print(f"Deleted vectors with ids successfully") + logger.info(f"Deleted vectors with ids successfully") except Exception as e: - print(f"Error deleting vectors with ids: {e}") + logger.error(f"Error deleting vectors with ids: {e}") raise e return True diff --git a/datastore/providers/postgres_datastore.py b/datastore/providers/postgres_datastore.py new file mode 100644 index 000000000..402ad1b28 --- /dev/null +++ b/datastore/providers/postgres_datastore.py @@ -0,0 +1,132 @@ +import os +from typing import Any, List +from datetime import datetime +import numpy as np + +from psycopg2 import connect +from psycopg2.extras import DictCursor +from pgvector.psycopg2 import register_vector + +from services.date import to_unix_timestamp +from datastore.providers.pgvector_datastore import PGClient, PgVectorDataStore +from models.models import ( + DocumentMetadataFilter, +) + +PG_HOST = os.environ.get("PG_HOST", "localhost") +PG_PORT = int(os.environ.get("PG_PORT", 5432)) +PG_DB = os.environ.get("PG_DB", "postgres") +PG_USER = os.environ.get("PG_USER", "postgres") +PG_PASSWORD = os.environ.get("PG_PASSWORD", "postgres") + + +# class that implements the DataStore interface for Postgres Datastore provider +class PostgresDataStore(PgVectorDataStore): + def create_db_client(self): + return PostgresClient() + + +class PostgresClient(PGClient): + def __init__(self) -> None: + super().__init__() + self.client = connect( + dbname=PG_DB, user=PG_USER, password=PG_PASSWORD, host=PG_HOST, port=PG_PORT + ) + register_vector(self.client) + + def __del__(self): + # close the connection when the client is destroyed + self.client.close() + + async def upsert(self, table: str, json: dict[str, Any]): + """ + Takes in a list of documents and inserts them into the table. + """ + with self.client.cursor() as cur: + if not json.get("created_at"): + json["created_at"] = datetime.now() + json["embedding"] = np.array(json["embedding"]) + cur.execute( + f"INSERT INTO {table} (id, content, embedding, document_id, source, source_id, url, author, created_at) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT (id) DO UPDATE SET content = %s, embedding = %s, document_id = %s, source = %s, source_id = %s, url = %s, author = %s, created_at = %s", + ( + json["id"], + json["content"], + json["embedding"], + json["document_id"], + json["source"], + json["source_id"], + json["url"], + json["author"], + json["created_at"], + json["content"], + json["embedding"], + json["document_id"], + json["source"], + json["source_id"], + json["url"], + json["author"], + json["created_at"], + ), + ) + self.client.commit() + + async def rpc(self, function_name: str, params: dict[str, Any]): + """ + Calls a stored procedure in the database with the given parameters. + """ + data = [] + params["in_embedding"] = np.array(params["in_embedding"]) + with self.client.cursor(cursor_factory=DictCursor) as cur: + cur.callproc(function_name, params) + rows = cur.fetchall() + self.client.commit() + for row in rows: + row["created_at"] = to_unix_timestamp(row["created_at"]) + data.append(dict(row)) + return data + + async def delete_like(self, table: str, column: str, pattern: str): + """ + Deletes rows in the table that match the pattern. + """ + with self.client.cursor() as cur: + cur.execute( + f"DELETE FROM {table} WHERE {column} LIKE %s", + (f"%{pattern}%",), + ) + self.client.commit() + + async def delete_in(self, table: str, column: str, ids: List[str]): + """ + Deletes rows in the table that match the ids. + """ + with self.client.cursor() as cur: + cur.execute( + f"DELETE FROM {table} WHERE {column} IN %s", + (tuple(ids),), + ) + self.client.commit() + + async def delete_by_filters(self, table: str, filter: DocumentMetadataFilter): + """ + Deletes rows in the table that match the filter. + """ + + filters = "WHERE" + if filter.document_id: + filters += f" document_id = '{filter.document_id}' AND" + if filter.source: + filters += f" source = '{filter.source}' AND" + if filter.source_id: + filters += f" source_id = '{filter.source_id}' AND" + if filter.author: + filters += f" author = '{filter.author}' AND" + if filter.start_date: + filters += f" created_at >= '{filter.start_date}' AND" + if filter.end_date: + filters += f" created_at <= '{filter.end_date}' AND" + filters = filters[:-4] + + with self.client.cursor() as cur: + cur.execute(f"DELETE FROM {table} {filters}") + self.client.commit() diff --git a/datastore/providers/redis_datastore.py b/datastore/providers/redis_datastore.py index 669f3fb83..da13348f7 100644 --- a/datastore/providers/redis_datastore.py +++ b/datastore/providers/redis_datastore.py @@ -1,5 +1,4 @@ import asyncio -import logging import os import re import json @@ -14,6 +13,7 @@ NumericField, VectorField, ) +from loguru import logger from typing import Dict, List, Optional from datastore.datastore import DataStore from models.models import ( @@ -62,7 +62,7 @@ async def _check_redis_module_exist(client: redis.Redis, modules: List[dict]): if module["name"] not in installed_modules or int(installed_modules[module["name"]]["ver"]) < int(module["ver"]): error_message = "You must add the RediSearch (>= 2.6) and ReJSON (>= 2.4) modules from Redis Stack. " \ "Please refer to Redis Stack docs: https://redis.io/docs/stack/" - logging.error(error_message) + logger.error(error_message) raise AttributeError(error_message) @@ -84,12 +84,12 @@ async def init(cls, **kwargs): """ try: # Connect to the Redis Client - logging.info("Connecting to Redis") + logger.info("Connecting to Redis") client = redis.Redis( host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD ) except Exception as e: - logging.error(f"Error setting up Redis: {e}") + logger.error(f"Error setting up Redis: {e}") raise e await _check_redis_module_exist(client, modules=REDIS_REQUIRED_MODULES) @@ -117,15 +117,15 @@ async def init(cls, **kwargs): try: # Check for existence of RediSearch Index await client.ft(REDIS_INDEX_NAME).info() - logging.info(f"RediSearch index {REDIS_INDEX_NAME} already exists") + logger.info(f"RediSearch index {REDIS_INDEX_NAME} already exists") except: # Create the RediSearch Index - logging.info(f"Creating new RediSearch index {REDIS_INDEX_NAME}") + logger.info(f"Creating new RediSearch index {REDIS_INDEX_NAME}") definition = IndexDefinition( prefix=[REDIS_DOC_PREFIX], index_type=IndexType.JSON ) fields = list(unpack_schema(redisearch_schema)) - logging.info(f"Creating index with fields: {fields}") + logger.info(f"Creating index with fields: {fields}") await client.ft(REDIS_INDEX_NAME).create_index( fields=fields, definition=definition ) @@ -299,10 +299,10 @@ async def _query( results: List[QueryResult] = [] # Gather query results in a pipeline - logging.info(f"Gathering {len(queries)} query results") + logger.info(f"Gathering {len(queries)} query results") for query in queries: - logging.info(f"Query: {query.query}") + logger.debug(f"Query: {query.query}") query_results: List[DocumentChunkWithScore] = [] # Extract Redis query @@ -348,12 +348,12 @@ async def delete( # Delete all vectors from the index if delete_all is True if delete_all: try: - logging.info(f"Deleting all documents from index") + logger.info(f"Deleting all documents from index") await self.client.ft(REDIS_INDEX_NAME).dropindex(True) - logging.info(f"Deleted all documents successfully") + logger.info(f"Deleted all documents successfully") return True except Exception as e: - logging.info(f"Error deleting all documents: {e}") + logger.error(f"Error deleting all documents: {e}") raise e # Delete by filter @@ -365,15 +365,15 @@ async def delete( f"{REDIS_DOC_PREFIX}:{filter.document_id}:*" ) await self._redis_delete(keys) - logging.info(f"Deleted document {filter.document_id} successfully") + logger.info(f"Deleted document {filter.document_id} successfully") except Exception as e: - logging.info(f"Error deleting document {filter.document_id}: {e}") + logger.error(f"Error deleting document {filter.document_id}: {e}") raise e # Delete by explicit ids (Redis keys) if ids: try: - logging.info(f"Deleting document ids {ids}") + logger.info(f"Deleting document ids {ids}") keys = [] # find all keys associated with the document ids for document_id in ids: @@ -382,10 +382,10 @@ async def delete( ) keys.extend(doc_keys) # delete all keys - logging.info(f"Deleting {len(keys)} keys from Redis") + logger.info(f"Deleting {len(keys)} keys from Redis") await self._redis_delete(keys) except Exception as e: - logging.info(f"Error deleting ids: {e}") + logger.error(f"Error deleting ids: {e}") raise e return True diff --git a/datastore/providers/supabase_datastore.py b/datastore/providers/supabase_datastore.py new file mode 100644 index 000000000..ec7395e56 --- /dev/null +++ b/datastore/providers/supabase_datastore.py @@ -0,0 +1,95 @@ +import os +from typing import Any, List +from datetime import datetime + +from supabase import Client + +from datastore.providers.pgvector_datastore import PGClient, PgVectorDataStore +from models.models import ( + DocumentMetadataFilter, +) + +SUPABASE_URL = os.environ.get("SUPABASE_URL") +assert SUPABASE_URL is not None, "SUPABASE_URL is not set" +SUPABASE_ANON_KEY = os.environ.get("SUPABASE_ANON_KEY") +# use service role key if you want this app to be able to bypass your Row Level Security policies +SUPABASE_SERVICE_ROLE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") +assert ( + SUPABASE_ANON_KEY is not None or SUPABASE_SERVICE_ROLE_KEY is not None +), "SUPABASE_ANON_KEY or SUPABASE_SERVICE_ROLE_KEY must be set" + + +# class that implements the DataStore interface for Supabase Datastore provider +class SupabaseDataStore(PgVectorDataStore): + def create_db_client(self): + return SupabaseClient() + + +class SupabaseClient(PGClient): + def __init__(self) -> None: + super().__init__() + if not SUPABASE_SERVICE_ROLE_KEY: + self.client = Client(SUPABASE_URL, SUPABASE_ANON_KEY) + else: + self.client = Client(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY) + + async def upsert(self, table: str, json: dict[str, Any]): + """ + Takes in a list of documents and inserts them into the table. + """ + if "created_at" in json: + json["created_at"] = json["created_at"][0].isoformat() + + self.client.table(table).upsert(json).execute() + + async def rpc(self, function_name: str, params: dict[str, Any]): + """ + Calls a stored procedure in the database with the given parameters. + """ + if "in_start_date" in params: + params["in_start_date"] = params["in_start_date"].isoformat() + if "in_end_date" in params: + params["in_end_date"] = params["in_end_date"].isoformat() + + response = self.client.rpc(function_name, params=params).execute() + return response.data + + async def delete_like(self, table: str, column: str, pattern: str): + """ + Deletes rows in the table that match the pattern. + """ + self.client.table(table).delete().like(column, pattern).execute() + + async def delete_in(self, table: str, column: str, ids: List[str]): + """ + Deletes rows in the table that match the ids. + """ + self.client.table(table).delete().in_(column, ids).execute() + + async def delete_by_filters(self, table: str, filter: DocumentMetadataFilter): + """ + Deletes rows in the table that match the filter. + """ + builder = self.client.table(table).delete() + if filter.document_id: + builder = builder.eq( + "document_id", + filter.document_id, + ) + if filter.source: + builder = builder.eq("source", filter.source) + if filter.source_id: + builder = builder.eq("source_id", filter.source_id) + if filter.author: + builder = builder.eq("author", filter.author) + if filter.start_date: + builder = builder.gte( + "created_at", + filter.start_date[0].isoformat(), + ) + if filter.end_date: + builder = builder.lte( + "created_at", + filter.end_date[0].isoformat(), + ) + builder.execute() diff --git a/datastore/providers/weaviate_datastore.py b/datastore/providers/weaviate_datastore.py index 9202835e5..fe3ae3b56 100644 --- a/datastore/providers/weaviate_datastore.py +++ b/datastore/providers/weaviate_datastore.py @@ -97,7 +97,7 @@ def handle_errors(self, results: Optional[List[dict]]) -> List[str]: continue for message in result["result"]["errors"]["error"]: error_messages.append(message["message"]) - logger.exception(message["message"]) + logger.error(message["message"]) return error_messages @@ -245,11 +245,11 @@ async def _single_query(query: QueryWithEmbedding) -> QueryResult: result = DocumentChunkWithScore( id=resp["chunk_id"], text=resp["text"], - embedding=resp["_additional"]["vector"], + # embedding=resp["_additional"]["vector"], score=resp["_additional"]["score"], metadata=DocumentChunkMetadata( document_id=resp["document_id"] if resp["document_id"] else "", - source=Source(resp["source"]), + source=Source(resp["source"]) if resp["source"] else None, source_id=resp["source_id"], url=resp["url"], created_at=resp["created_at"], diff --git a/datastore/providers/zilliz_datastore.py b/datastore/providers/zilliz_datastore.py index 1db641f63..81f151c43 100644 --- a/datastore/providers/zilliz_datastore.py +++ b/datastore/providers/zilliz_datastore.py @@ -1,5 +1,6 @@ import os +from loguru import logger from typing import Optional from pymilvus import ( connections, @@ -47,7 +48,7 @@ def _create_connection(self): # Connect to the Zilliz instance using the passed in Environment variables self.alias = uuid4().hex connections.connect(alias=self.alias, uri=ZILLIZ_URI, user=ZILLIZ_USER, password=ZILLIZ_PASSWORD, secure=ZILLIZ_USE_SECURITY) # type: ignore - self._print_info("Connect to zilliz cloud server") + logger.info("Connect to zilliz cloud server") def _create_index(self): try: @@ -59,6 +60,6 @@ def _create_index(self): self.col.load() self.search_params = {"metric_type": "IP", "params": {}} except Exception as e: - self._print_err("Failed to create index, error: {}".format(e)) + logger.error("Failed to create index, error: {}".format(e)) diff --git a/docs/deployment/removing-unused-dependencies.md b/docs/deployment/removing-unused-dependencies.md index dcdac20c7..44a56c630 100644 --- a/docs/deployment/removing-unused-dependencies.md +++ b/docs/deployment/removing-unused-dependencies.md @@ -4,14 +4,17 @@ Before deploying your app, you might want to remove unused dependencies from you Here are the packages you can remove for each vector database provider: -- **Pinecone:** Remove `weaviate-client`, `pymilvus`, `qdrant-client`, `redis`, `chromadb`, `llama-index`, `azure-identity` and `azure-search-documents`. -- **Weaviate:** Remove `pinecone-client`, `pymilvus`, `qdrant-client`, `redis`, `chromadb`, `llama-index`, `azure-identity` and `azure-search-documents`. -- **Zilliz:** Remove `pinecone-client`, `weaviate-client`, `qdrant-client`, `redis`, `chromadb`, `llama-index`, `azure-identity` and `azure-search-documents`. -- **Milvus:** Remove `pinecone-client`, `weaviate-client`, `qdrant-client`, `redis`, `chromadb`, `llama-index`, `azure-identity` and `azure-search-documents`. -- **Qdrant:** Remove `pinecone-client`, `weaviate-client`, `pymilvus`, `redis`, `chromadb`, `llama-index`, `azure-identity` and `azure-search-documents`. -- **Redis:** Remove `pinecone-client`, `weaviate-client`, `pymilvus`, `qdrant-client`, `chromadb`, `llama-index`, `azure-identity` and `azure-search-documents`. -- **LlamaIndex:** Remove `pinecone-client`, `weaviate-client`, `pymilvus`, `qdrant-client`, `chromadb`, `redis`, `azure-identity` and `azure-search-documents`. -- **Chroma:**: Remove `pinecone-client`, `weaviate-client`, `pymilvus`, `qdrant-client`, `llama-index`, `redis`, `azure-identity` and `azure-search-documents`. -- **Azure Cognitive Search**: Remove `pinecone-client`, `weaviate-client`, `pymilvus`, `qdrant-client`, `llama-index`, `redis` and `chromadb`. +- **Pinecone:** Remove `weaviate-client`, `pymilvus`, `qdrant-client`, `redis`, `chromadb`, `llama-index`, `azure-identity`, `azure-search-documents`, `supabase`, `psycopg2`+`pgvector`, and `psycopg2cffi`. +- **Weaviate:** Remove `pinecone-client`, `pymilvus`, `qdrant-client`, `redis`, `chromadb`, `llama-index`, `azure-identity` and `azure-search-documents`, `supabase`, `psycopg2`+`pgvector`, `psycopg2cffi`. +- **Zilliz:** Remove `pinecone-client`, `weaviate-client`, `qdrant-client`, `redis`, `chromadb`, `llama-index`, `azure-identity` and `azure-search-documents`, `supabase`, `psycopg2`+`pgvector`, and `psycopg2cffi`. +- **Milvus:** Remove `pinecone-client`, `weaviate-client`, `qdrant-client`, `redis`, `chromadb`, `llama-index`, `azure-identity` and `azure-search-documents`, `supabase`, `psycopg2`+`pgvector`, and `psycopg2cffi`. +- **Qdrant:** Remove `pinecone-client`, `weaviate-client`, `pymilvus`, `redis`, `chromadb`, `llama-index`, `azure-identity` and `azure-search-documents`, `supabase`, `psycopg2`+`pgvector`, and `psycopg2cffi`. +- **Redis:** Remove `pinecone-client`, `weaviate-client`, `pymilvus`, `qdrant-client`, `chromadb`, `llama-index`, `azure-identity` and `azure-search-documents`, `supabase`, `psycopg2`+`pgvector`, and `psycopg2cffi`. +- **LlamaIndex:** Remove `pinecone-client`, `weaviate-client`, `pymilvus`, `qdrant-client`, `chromadb`, `redis`, `azure-identity` and `azure-search-documents`, `supabase`, `psycopg2`+`pgvector`, and `psycopg2cffi`. +- **Chroma:**: Remove `pinecone-client`, `weaviate-client`, `pymilvus`, `qdrant-client`, `llama-index`, `redis`, `azure-identity` and `azure-search-documents`, `supabase`, `psycopg2`+`pgvector`, and `psycopg2cffi`. +- **Azure Cognitive Search**: Remove `pinecone-client`, `weaviate-client`, `pymilvus`, `qdrant-client`, `llama-index`, `redis` and `chromadb`, `supabase`, `psycopg2`+`pgvector`, and `psycopg2cffi`. +- **Supabase:** Remove `pinecone-client`, `weaviate-client`, `pymilvus`, `qdrant-client`, `redis`, `llama-index`, `azure-identity` and `azure-search-documents`, `psycopg2`+`pgvector`, and `psycopg2cffi`. +- **Postgres:** Remove `pinecone-client`, `weaviate-client`, `pymilvus`, `qdrant-client`, `redis`, `llama-index`, `azure-identity` and `azure-search-documents`, `supabase`, and `psycopg2cffi`. +- **AnalyticDB:** Remove `pinecone-client`, `weaviate-client`, `pymilvus`, `qdrant-client`, `redis`, `llama-index`, `azure-identity` and `azure-search-documents`, `supabase`, and `psycopg2`+`pgvector`. After removing the unnecessary packages from the `pyproject.toml` file, you don't need to run `poetry lock` and `poetry install` manually. The provided Dockerfile takes care of installing the required dependencies using the `requirements.txt` file generated by the `poetry export` command. diff --git a/docs/deployment/render-thumbnail.png b/docs/deployment/render-thumbnail.png new file mode 100644 index 000000000..4bd725aae Binary files /dev/null and b/docs/deployment/render-thumbnail.png differ diff --git a/docs/deployment/render.md b/docs/deployment/render.md new file mode 100644 index 000000000..741f55538 --- /dev/null +++ b/docs/deployment/render.md @@ -0,0 +1,19 @@ +# Deploying to Render + +## Removing Unused Dependencies + +Before deploying your app, you might want to remove unused dependencies from your [pyproject.toml](/pyproject.toml) file to reduce the size of your app and improve its performance. Depending on the vector database provider you choose, you can remove the packages that are not needed for your specific provider. + +Find the packages you can remove for each vector database provider [here](removing-unused-dependencies.md). + +After removing the unnecessary packages from the `pyproject.toml` file, you don't need to run `poetry lock` and `poetry install` manually. The provided Dockerfile takes care of installing the required dependencies using the `requirements.txt` file generated by the `poetry export` command. + +## Deployment + +Render maintains a [fork](https://github.com/render-examples/chatgpt-retrieval-plugin/) of this repository with a few small changes that facilitate easy deployment. The source code is unchanged. To deploy both the Docker container from this repository and a self-hosted Weaviate database to back it, just click the button below. Enter your OpenAI API key when prompted. + +[Deploy to Render](https://render.com/deploy?repo=https://github.com/render-examples/chatgpt-retrieval-plugin/tree/main) + +The bearer token will be randomly generated for you. You can view it in in the "Environment" tab on the [Render dashboard](https://dashboard.render.com) page for your server. For more guidance, consult the [README in Render's fork](https://github.com/render-examples/chatgpt-retrieval-plugin/blob/main/README.md), [Render's documentation](https://render.com/docs), or the screen recording linked below. + +[![Deploy to Render screen recording](render-thumbnail.png)](https://vimeo.com/823610578) diff --git a/docs/providers/analyticdb/setup.md b/docs/providers/analyticdb/setup.md new file mode 100644 index 000000000..8aed01f44 --- /dev/null +++ b/docs/providers/analyticdb/setup.md @@ -0,0 +1,82 @@ +# AnalyticDB + +[AnalyticDB](https://www.alibabacloud.com/help/en/analyticdb-for-postgresql/latest/product-introduction-overview) is a distributed cloud-native vector database designed for storing documents and vector embeddings. It is a high-performance vector database that is fully compatible with PostgreSQL syntax, making it easy to use. Managed by Alibaba Cloud, AnalyticDB offers a powerful vector compute engine, processing billions of data vectors and providing a wide range of features, including indexing algorithms, structured and unstructured data capabilities, real-time updates, distance metrics, scalar filtering, and time travel searches. Additionally, it offers full OLAP database functionality and an SLA commitment for production use. + +## Install Requirements + +Run the following command to install the required packages, including the `psycopg2cffi` package: + +``` +poetry install --extras "postgresql" +``` + +If you encounter the `Error: pg_config executable not found.` issue, you need to install the PostgreSQL development package on your system. Follow the instructions for your specific Linux distribution: + +1. Debian-based systems (e.g., Ubuntu): + +```bash +sudo apt-get update +sudo apt-get install libpq-dev +``` + +2. RHEL-based systems (e.g., CentOS, Fedora): + +```bash +sudo yum install postgresql-devel +``` + +3. Arch-based systems (e.g., Manjaro, Arch Linux): + +```bash +sudo pacman -S postgresql-libs +``` + +4. macOS: + +```bash +brew install postgresql +``` + +After installing the required package, try to install `psycopg2cffi` again. If the `pg_config` executable is still not found, add its location to your system's `PATH` variable. You can typically find the `pg_config` executable in the `bin` directory of your PostgreSQL installation, for example `/usr/pgsql-13/bin/pg_config`. To add it to your `PATH` variable, use the following command (replace the path with the correct one for your system): + +```bash +export PATH=$PATH:/usr/pgsql-13/bin +``` + +Now, try installing `psycopg2cffi` again using Poetry. + +**Environment Variables:** + +| Name | Required | Description | Default | +| ---------------- | -------- | ----------------------------------- | ----------------- | +| `DATASTORE` | Yes | Datastore name, set to `analyticdb` | | +| `BEARER_TOKEN` | Yes | Secret token | | +| `OPENAI_API_KEY` | Yes | OpenAI API key | | +| `PG_HOST` | Yes | AnalyticDB instance URL | `localhost` | +| `PG_USER` | Yes | Database user | `user` | +| `PG_PASSWORD` | Yes | Database password | `password` | +| `PG_PORT` | Optional | Port for AnalyticDB communication | `5432` | +| `PG_DATABASE` | Optional | Database name | `postgres` | +| `PG_COLLECTION` | Optional | AnalyticDB relation name | `document_chunks` | + +## AnalyticDB Cloud + +For a hosted [AnalyticDB Cloud](https://cloud.qdrant.io/) version, provide the AnalyticDB instance URL: + +**Example:** + +```bash +PG_HOST="https://YOUR-CLUSTER-URL.gpdb.rds.aliyuncs.com" +PG_USER="YOUR-USER-NAME" +PG_PASSWORD="YOUR-PASSWORD" +``` + +The other parameters are optional and can be changed if needed. + +## Running AnalyticDB Integration Tests + +A suite of integration tests verifies the AnalyticDB integration. Launch the test suite with this command: + +```bash +pytest ./tests/datastore/providers/analyticdb/test_analyticdb_datastore.py +``` diff --git a/docs/providers/postgres/setup.md b/docs/providers/postgres/setup.md new file mode 100644 index 000000000..25c64e4cc --- /dev/null +++ b/docs/providers/postgres/setup.md @@ -0,0 +1,81 @@ +# Postgres + +Postgres Database offers an easy and efficient way to store vectors via [pgvector](https://github.com/pgvector/pgvector) extension. To use pgvector, you will need to set up a PostgreSQL database with the pgvector extension enabled or use a managed solution that provides pgvector. For a hosted/managed solution, you can use any of the cloud vendors which support [pgvector](https://github.com/pgvector/pgvector#hosted-postgres). + +- The database needs the `pgvector` extension. +- To apply required migrations you may use any tool you are more familiar with like [pgAdmin](https://www.pgadmin.org/), [DBeaver](https://dbeaver.io/), [DataGrip](https://www.jetbrains.com/datagrip/), or `psql` cli. + +**Retrieval App Environment Variables** + +| Name | Required | Description | +| ---------------- | -------- | -------------------------------------- | +| `DATASTORE` | Yes | Datastore name. Set this to `postgres` | +| `BEARER_TOKEN` | Yes | Your secret token | +| `OPENAI_API_KEY` | Yes | Your OpenAI API key | + +**Postgres Datastore Environment Variables** + +| Name | Required | Description | Default | +| ------------- | -------- | ----------------- | ---------- | +| `PG_HOST` | Optional | Postgres host | localhost | +| `PG_PORT` | Optional | Postgres port | `5432` | +| `PG_PASSWORD` | Optional | Postgres password | `postgres` | +| `PG_USER` | Optional | Postgres username | `postgres` | +| `PG_DB` | Optional | Postgres database | `postgres` | + +## Postgres Datastore local development & testing + +In order to test your changes to the Postgres Datastore, you can run the following: + +1. You can run local or self-hosted instance of PostgreSQL with `pgvector` enabled using Docker. + +```bash +docker pull ankane/pgvector +``` + +```bash +docker run --name pgvector -e POSTGRES_PASSWORD=mysecretpassword -d postgres +``` + +Check PostgreSQL [official docker image](https://github.com/docker-library/docs/blob/master/postgres/README.md) for more options. + +2. Apply migrations using any tool you like most [pgAdmin](https://www.pgadmin.org/), [DBeaver](https://dbeaver.io/), [DataGrip](https://www.jetbrains.com/datagrip/), or `psql` cli. + +```bash +# apply migrations using psql cli +psql -h localhost -p 5432 -U postgres -d postgres -f examples/providers/supabase/migrations/20230414142107_init_pg_vector.sql +``` + +3. Export environment variables required for the Postgres Datastore + +```bash +export PG_HOST=localhost +export PG_PORT=54322 +export PG_PASSWORD=mysecretpassword +``` + +4. Run the Postgres datastore tests from the project's root directory + +```bash +# Run the Postgres datastore tests +# go to project's root directory and run +poetry run pytest -s ./tests/datastore/providers/postgres/test_postgres_datastore.py +``` + +5. When going to prod don't forget to set the password for the `postgres` user to something more secure and apply migrations. + +6. You may want to remove RLS (Row Level Security) from the `documents` table. If you are not using RLS, it is not required in this setup. But it may be useful if you want to separate documents by user or group of users, or if you want to give permissions to insert or query documents to different users. And RLS is especially important if you are willing to use PostgREST. To do so you can just remove the following statement from the `20230414142107_init_pg_vector.sql` migration file: `alter table documents enable row level security;`. + +## Indexes for Postgres + +By default, pgvector performs exact nearest neighbor search. To speed up the vector comparison, you may want to create indexes for the `embedding` column in the `documents` table. You should do this **only** after a few thousand records are inserted. + +As datasotre is using inner product for similarity search, you can add index as follows: + +```sql +create index on documents using ivfflat (embedding vector_ip_ops) with (lists = 100); +``` + +To choose `lists` constant - a good place to start is records / 1000 for up to 1M records and sqrt(records) for over 1M records + +For more information about indexes, see [pgvector docs](https://github.com/pgvector/pgvector#indexing). diff --git a/docs/providers/supabase/setup.md b/docs/providers/supabase/setup.md new file mode 100644 index 000000000..8d2f05a73 --- /dev/null +++ b/docs/providers/supabase/setup.md @@ -0,0 +1,87 @@ +# Supabase + +[Supabase](https://supabase.com/blog/openai-embeddings-postgres-vector) offers an easy and efficient way to store vectors via [pgvector](https://github.com/pgvector/pgvector) extension for Postgres Database. [You can use Supabase CLI](https://github.com/supabase/cli) to set up a whole Supabase stack locally or in the cloud or you can also use docker-compose, k8s and other options available. For a hosted/managed solution, try [Supabase.com](https://supabase.com/) and unlock the full power of Postgres with built-in authentication, storage, auto APIs, and Realtime features. See more helpful examples of Supabase & pgvector as a vector database [here](https://github.com/supabase-community/nextjs-openai-doc-search). + +- The database needs the `pgvector` extension, which is included in [Supabase distribution of Postgres](https://github.com/supabase/postgres). +- It is possible to provide a Postgres connection string and an app will add `documents` table, query Postgres function, and `pgvector` extension automatically. +- But it is recommended to separate the migration process from an app. And execute the migration script in a different pipeline by using SQL statements from `_init_db()` function in [Supabase datastore provider](/datastore/providers/supabase_datastore.py). + +**Retrieval App Environment Variables** + +| Name | Required | Description | +| ---------------- | -------- | -------------------------------------- | +| `DATASTORE` | Yes | Datastore name. Set this to `supabase` | +| `BEARER_TOKEN` | Yes | Your secret token | +| `OPENAI_API_KEY` | Yes | Your OpenAI API key | + +**Supabase Datastore Environment Variables** + +| Name | Required | Description | Default | +| --------------------------- | -------- | ------------------------------------------------------------------------------ | ------- | +| `SUPABASE_URL` | Yes | Supabase Project URL | | +| `SUPABASE_ANON_KEY` | Optional | Supabase Project API anon key | | +| `SUPABASE_SERVICE_ROLE_KEY` | Optional | Supabase Project API service key, will be used if provided instead of anon key | | + +## Supabase Datastore local development & testing + +In order to test your changes to the Supabase Datastore, you can run the following commands: + +1. Install [Supabase CLI](https://github.com/supabase/cli) and [Docker](https://docs.docker.com/get-docker/) + +2. Run the Supabase `start` command from `examples/providers` directory. Config for Supabase local setup is available in `examples/providers/supabase` directory with required migrations. + +```bash +# Run the Supabase stack using cli in docker +# go to examples/providers and run supabase start +cd examples/providers +supabase start +``` + +3. Supabase `start` will download docker images and launch Supabase stack locally. You will see similar output: + +```bash +Applying migration 20230414142107_init_pg_vector.sql... +Seeding data supabase/seed.sql... +Started supabase local development setup. + + API URL: http://localhost:54321 + DB URL: postgresql://postgres:postgres@localhost:54322/postgres + Studio URL: http://localhost:54323 + Inbucket URL: http://localhost:54324 + JWT secret: super-secret-jwt-token-with-at-least-32-characters-long + anon key: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6ImFub24iLCJleHAiOjE5ODM4MTI5OTZ9.CRXP1A7WOeoJeXxjNni43kdQwgnWNReilDMblYTn_I0 +service_role key: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImV4cCI6MTk4MzgxMjk5Nn0.EGIM96RAZx35lJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU +``` + +4. Export environment variables required for the Supabase Datastore + +```bash +export SUPABASE_URL=http://localhost:54321 +export SUPABASE_SERVICE_ROLE_KEY='eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImV4cCI6MTk4MzgxMjk5Nn0.EGIM96RAZx35lJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU' +``` + +5. Run the Supabase datastore tests from the project's root directory + +```bash +# Run the Supabase datastore tests +# go to project's root directory and run +poetry run pytest -s ./tests/datastore/providers/supabase/test_supabase_datastore.py +``` + +6. When you go to prod (if cloud hosted) it is recommended to link your supabase project with the local setup from `examples/providers/supabase`. All migrations will be synced with the cloud project after you run `supabase db push`. Or you can manually apply migrations from `examples/providers/supabase/migrations` directory. + +7. You might want to add RLS policies to the `documents` table. Or you can just continue using it on the server side only with the service role key. But you should not use service role key on the client side in any case. + +## Indexes for Postgres + +By default, pgvector performs exact nearest neighbor search. To speed up the vector comparison, you may want to create indexes for the `embedding` column in the `documents` table. You should do this **only** after a few thousand records are inserted. + +As datasotre is using inner product for similarity search, you can add index as follows: + +```sql +create index on documents using ivfflat (embedding vector_ip_ops) with (lists = 100); +``` + +To choose `lists` constant - a good place to start is records / 1000 for up to 1M records and sqrt(records) for over 1M records + +For more information about indexes, see [pgvector docs](https://github.com/pgvector/pgvector#indexing). diff --git a/examples/authentication-methods/no-auth/main.py b/examples/authentication-methods/no-auth/main.py index 1fd5458b4..961c725c1 100644 --- a/examples/authentication-methods/no-auth/main.py +++ b/examples/authentication-methods/no-auth/main.py @@ -4,6 +4,7 @@ import uvicorn from fastapi import FastAPI, File, Form, HTTPException, Body, UploadFile from fastapi.staticfiles import StaticFiles +from loguru import logger from models.api import ( DeleteRequest, @@ -55,7 +56,7 @@ async def upsert_file( ids = await datastore.upsert([document]) return UpsertResponse(ids=ids) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail=f"str({e})") @@ -70,7 +71,7 @@ async def upsert( ids = await datastore.upsert(request.documents) return UpsertResponse(ids=ids) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail="Internal Service Error") @@ -87,7 +88,7 @@ async def query_main( ) return QueryResponse(results=results) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail="Internal Service Error") @@ -105,7 +106,7 @@ async def query( ) return QueryResponse(results=results) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail="Internal Service Error") @@ -129,7 +130,7 @@ async def delete( ) return DeleteResponse(success=success) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail="Internal Service Error") diff --git a/examples/memory/main.py b/examples/memory/main.py index 5c96e4289..c94d3f94d 100644 --- a/examples/memory/main.py +++ b/examples/memory/main.py @@ -8,6 +8,7 @@ from fastapi import FastAPI, File, Form, HTTPException, Depends, Body, UploadFile from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials from fastapi.staticfiles import StaticFiles +from loguru import logger from models.api import ( DeleteRequest, @@ -71,7 +72,7 @@ async def upsert_file( ids = await datastore.upsert([document]) return UpsertResponse(ids=ids) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail=f"str({e})") @@ -87,7 +88,7 @@ async def upsert_main( ids = await datastore.upsert(request.documents) return UpsertResponse(ids=ids) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail="Internal Service Error") @@ -105,7 +106,7 @@ async def upsert( ids = await datastore.upsert(request.documents) return UpsertResponse(ids=ids) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail="Internal Service Error") @@ -123,7 +124,7 @@ async def query_main( ) return QueryResponse(results=results) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail="Internal Service Error") @@ -143,7 +144,7 @@ async def query( ) return QueryResponse(results=results) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail="Internal Service Error") @@ -168,7 +169,7 @@ async def delete( ) return DeleteResponse(success=success) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail="Internal Service Error") diff --git a/examples/providers/supabase/.gitignore b/examples/providers/supabase/.gitignore new file mode 100644 index 000000000..773c7c3e0 --- /dev/null +++ b/examples/providers/supabase/.gitignore @@ -0,0 +1,3 @@ +# Supabase +.branches +.temp diff --git a/examples/providers/supabase/config.toml b/examples/providers/supabase/config.toml new file mode 100644 index 000000000..921313039 --- /dev/null +++ b/examples/providers/supabase/config.toml @@ -0,0 +1,72 @@ +# A string used to distinguish different Supabase projects on the same host. Defaults to the working +# directory name when running `supabase init`. +project_id = "providers" + +[api] +# Port to use for the API URL. +port = 54321 +# Schemas to expose in your API. Tables, views and stored procedures in this schema will get API +# endpoints. public and storage are always included. +schemas = ["public", "storage", "graphql_public"] +# Extra schemas to add to the search_path of every request. public is always included. +extra_search_path = ["public", "extensions"] +# The maximum number of rows returns from a view, table, or stored procedure. Limits payload size +# for accidental or malicious requests. +max_rows = 1000 + +[db] +# Port to use for the local database URL. +port = 54322 +# The database major version to use. This has to be the same as your remote database's. Run `SHOW +# server_version;` on the remote database to check. +major_version = 15 + +[studio] +# Port to use for Supabase Studio. +port = 54323 + +# Email testing server. Emails sent with the local dev setup are not actually sent - rather, they +# are monitored, and you can view the emails that would have been sent from the web interface. +[inbucket] +# Port to use for the email testing server web interface. +port = 54324 +smtp_port = 54325 +pop3_port = 54326 + +[storage] +# The maximum file size allowed (e.g. "5MB", "500KB"). +file_size_limit = "50MiB" + +[auth] +# The base URL of your website. Used as an allow-list for redirects and for constructing URLs used +# in emails. +site_url = "http://localhost:3000" +# A list of *exact* URLs that auth providers are permitted to redirect to post authentication. +additional_redirect_urls = ["https://localhost:3000"] +# How long tokens are valid for, in seconds. Defaults to 3600 (1 hour), maximum 604,800 seconds (one +# week). +jwt_expiry = 3600 +# Allow/disallow new user signups to your project. +enable_signup = true + +[auth.email] +# Allow/disallow new user signups via email to your project. +enable_signup = true +# If enabled, a user will be required to confirm any email change on both the old, and new email +# addresses. If disabled, only the new email is required to confirm. +double_confirm_changes = true +# If enabled, users need to confirm their email address before signing in. +enable_confirmations = false + +# Use an external OAuth provider. The full list of providers are: `apple`, `azure`, `bitbucket`, +# `discord`, `facebook`, `github`, `gitlab`, `google`, `keycloak`, `linkedin`, `notion`, `twitch`, +# `twitter`, `slack`, `spotify`, `workos`, `zoom`. +[auth.external.apple] +enabled = false +client_id = "" +secret = "" +# Overrides the default auth redirectUrl. +redirect_uri = "" +# Overrides the default auth provider URL. Used to support self-hosted gitlab, single-tenant Azure, +# or any other third-party OIDC providers. +url = "" diff --git a/examples/providers/supabase/migrations/20230414142107_init_pg_vector.sql b/examples/providers/supabase/migrations/20230414142107_init_pg_vector.sql new file mode 100644 index 000000000..4d54797b2 --- /dev/null +++ b/examples/providers/supabase/migrations/20230414142107_init_pg_vector.sql @@ -0,0 +1,70 @@ +create extension vector; + +create table if not exists documents ( + id text primary key default gen_random_uuid()::text, + source text, + source_id text, + content text, + document_id text, + author text, + url text, + created_at timestamptz default now(), + embedding vector(1536) +); + +create index ix_documents_document_id on documents using btree ( document_id ); +create index ix_documents_source on documents using btree ( source ); +create index ix_documents_source_id on documents using btree ( source_id ); +create index ix_documents_author on documents using btree ( author ); +create index ix_documents_created_at on documents using brin ( created_at ); + +alter table documents enable row level security; + +create or replace function match_page_sections(in_embedding vector(1536) + , in_match_count int default 3 + , in_document_id text default '%%' + , in_source_id text default '%%' + , in_source text default '%%' + , in_author text default '%%' + , in_start_date timestamptz default '-infinity' + , in_end_date timestamptz default 'infinity') +returns table (id text + , source text + , source_id text + , document_id text + , url text + , created_at timestamptz + , author text + , content text + , embedding vector(1536) + , similarity float) +language plpgsql +as $$ +#variable_conflict use_variable +begin +return query +select + documents.id, + documents.source, + documents.source_id, + documents.document_id, + documents.url, + documents.created_at, + documents.author, + documents.content, + documents.embedding, + (documents.embedding <#> in_embedding) * -1 as similarity +from documents + +where in_start_date <= documents.created_at and + documents.created_at <= in_end_date and + (documents.source_id like in_source_id or documents.source_id is null) and + (documents.source like in_source or documents.source is null) and + (documents.author like in_author or documents.author is null) and + (documents.document_id like in_document_id or documents.document_id is null) + +order by documents.embedding <#> in_embedding + +limit in_match_count; +end; +$$; \ No newline at end of file diff --git a/examples/providers/supabase/seed.sql b/examples/providers/supabase/seed.sql new file mode 100644 index 000000000..e69de29bb diff --git a/local-server/ai-plugin.json b/local_server/ai-plugin.json similarity index 100% rename from local-server/ai-plugin.json rename to local_server/ai-plugin.json diff --git a/local-server/logo.png b/local_server/logo.png similarity index 100% rename from local-server/logo.png rename to local_server/logo.png diff --git a/local-server/main.py b/local_server/main.py similarity index 87% rename from local-server/main.py rename to local_server/main.py index ce274b5a2..81506fd2b 100644 --- a/local-server/main.py +++ b/local_server/main.py @@ -3,6 +3,7 @@ from typing import Optional import uvicorn from fastapi import FastAPI, File, Form, HTTPException, Body, UploadFile +from loguru import logger from models.api import ( DeleteRequest, @@ -41,19 +42,21 @@ @app.route("/.well-known/ai-plugin.json") async def get_manifest(request): - file_path = "./local-server/ai-plugin.json" - return FileResponse(file_path, media_type="text/json") + file_path = "./local_server/ai-plugin.json" + simple_headers = {} + simple_headers["Access-Control-Allow-Private-Network"] = "true" + return FileResponse(file_path, media_type="text/json", headers=simple_headers) @app.route("/.well-known/logo.png") async def get_logo(request): - file_path = "./local-server/logo.png" + file_path = "./local_server/logo.png" return FileResponse(file_path, media_type="text/json") @app.route("/.well-known/openapi.yaml") async def get_openapi(request): - file_path = "./local-server/openapi.yaml" + file_path = "./local_server/openapi.yaml" return FileResponse(file_path, media_type="text/json") @@ -80,7 +83,7 @@ async def upsert_file( ids = await datastore.upsert([document]) return UpsertResponse(ids=ids) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail=f"str({e})") @@ -95,7 +98,7 @@ async def upsert( ids = await datastore.upsert(request.documents) return UpsertResponse(ids=ids) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail="Internal Service Error") @@ -107,7 +110,7 @@ async def query_main(request: QueryRequest = Body(...)): ) return QueryResponse(results=results) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail="Internal Service Error") @@ -131,7 +134,7 @@ async def delete( ) return DeleteResponse(success=success) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail="Internal Service Error") @@ -142,4 +145,4 @@ async def startup(): def start(): - uvicorn.run("local-server.main:app", host="localhost", port=PORT, reload=True) + uvicorn.run("local_server.main:app", host="localhost", port=PORT, reload=True) diff --git a/local-server/openapi.yaml b/local_server/openapi.yaml similarity index 100% rename from local-server/openapi.yaml rename to local_server/openapi.yaml diff --git a/poetry.lock b/poetry.lock index d119e11aa..55cf9e55c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -240,20 +240,20 @@ aio = ["aiohttp (>=3.0)"] [[package]] name = "azure-identity" -version = "1.12.0" +version = "1.13.0" description = "Microsoft Azure Identity Library for Python" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "azure-identity-1.12.0.zip", hash = "sha256:7f9b1ae7d97ea7af3f38dd09305e19ab81a1e16ab66ea186b6579d85c1ca2347"}, - {file = "azure_identity-1.12.0-py3-none-any.whl", hash = "sha256:2a58ce4a209a013e37eaccfd5937570ab99e9118b3e1acf875eed3a85d541b92"}, + {file = "azure-identity-1.13.0.zip", hash = "sha256:c931c27301ffa86b07b4dcf574e29da73e3deba9ab5d1fe4f445bb6a3117e260"}, + {file = "azure_identity-1.13.0-py3-none-any.whl", hash = "sha256:bd700cebb80cd9862098587c29d8677e819beca33c62568ced6d5a8e5e332b82"}, ] [package.dependencies] azure-core = ">=1.11.0,<2.0.0" cryptography = ">=2.5" -msal = ">=1.12.0,<2.0.0" +msal = ">=1.20.0,<2.0.0" msal-extensions = ">=0.3.0,<2.0.0" six = ">=1.12.0" @@ -291,6 +291,25 @@ files = [ {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, ] +[[package]] +name = "bleach" +version = "6.0.0" +description = "An easy safelist-based HTML-sanitizing tool." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "bleach-6.0.0-py3-none-any.whl", hash = "sha256:33c16e3353dbd13028ab4799a0f89a83f113405c766e9c122df8a06f5b85b3f4"}, + {file = "bleach-6.0.0.tar.gz", hash = "sha256:1a1a85c1595e07d8db14c5f09f09e6433502c51c595970edc090551f0db99414"}, +] + +[package.dependencies] +six = ">=1.9.0" +webencodings = "*" + +[package.extras] +css = ["tinycss2 (>=1.1.0,<1.2)"] + [[package]] name = "blobfile" version = "2.0.2" @@ -523,79 +542,94 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} +[[package]] +name = "click-log" +version = "0.4.0" +description = "Logging integration for Click" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "click-log-0.4.0.tar.gz", hash = "sha256:3970f8570ac54491237bcdb3d8ab5e3eef6c057df29f8c3d1151a51a9c23b975"}, + {file = "click_log-0.4.0-py2.py3-none-any.whl", hash = "sha256:a43e394b528d52112af599f2fc9e4b7cf3c15f94e53581f74fa6867e68c91756"}, +] + +[package.dependencies] +click = "*" + [[package]] name = "clickhouse-connect" -version = "0.5.23" +version = "0.5.24" description = "ClickHouse core driver, SqlAlchemy, and Superset libraries" category = "main" optional = false python-versions = "~=3.7" files = [ - {file = "clickhouse-connect-0.5.23.tar.gz", hash = "sha256:d4c48f2b05807720a638df4e8f8e71d45a6eb548c6183b44782270631a34b849"}, - {file = "clickhouse_connect-0.5.23-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df35ab8af6d46cce12552d7bd25fe657d3ad8c8b4956a1067441c25c1e63cc61"}, - {file = "clickhouse_connect-0.5.23-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:addb0a933821b68984dff82345846a6c5fd161e471cfdfc22933d3c33dafe545"}, - {file = "clickhouse_connect-0.5.23-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:181670af78bd4186d8b250ad25a2afedf4a50a938e2f10dc945b291d7956f6bc"}, - {file = "clickhouse_connect-0.5.23-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e1ac16086a3f247943aea2fceaec6fddcfaf00f87376dede2199ca26a41aa28"}, - {file = "clickhouse_connect-0.5.23-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f8de0e470329eb1f26c9a3feba38e13732ca91b11f414f76c91d52ff4a5ff4f8"}, - {file = "clickhouse_connect-0.5.23-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e68dad117e7b27ae3ba30ae7b5c8a8ce9a7f34703aea04e18dba5b09e353f38d"}, - {file = "clickhouse_connect-0.5.23-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:2d8aadd1508d9acb2f7a604662aa6987efb87ea9e51cd2e3413d61c69f1fdd50"}, - {file = "clickhouse_connect-0.5.23-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1a8f3887c2422ee28512aa944e3bc642c9dd6c2749d8c204c25a90989bf4a430"}, - {file = "clickhouse_connect-0.5.23-cp310-cp310-win32.whl", hash = "sha256:d1909c6359855dd42225709672bb61e3fbc2c8ab5e6cbb48582136df3d57d216"}, - {file = "clickhouse_connect-0.5.23-cp310-cp310-win_amd64.whl", hash = "sha256:95eff3b153a5fd810f54dfc1b3043a86218db0a31f17c926eacca262a269cfac"}, - {file = "clickhouse_connect-0.5.23-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:72d75a02a1caa3fdb5edbab988e4d3abfcf2f380df3ea68275047c40f39e562e"}, - {file = "clickhouse_connect-0.5.23-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bc23c24b2aabdd81366c9a90c6262ecfcb64b22e16b5704d29dc648c732ac8fd"}, - {file = "clickhouse_connect-0.5.23-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99f0f72bccbf55578ab34c7ad9a76f803db1eb778ed7ca0e4f4b867c75a41c16"}, - {file = "clickhouse_connect-0.5.23-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:842150ff73575cfe9cc295ccb4c205ec5a8907c3290d2a99460427993c7a6a03"}, - {file = "clickhouse_connect-0.5.23-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3703d8ff05b2d9ff30bb1fc4eaff5349cc1017a193e6d5746fc35393d2f80899"}, - {file = "clickhouse_connect-0.5.23-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:af3f83a85341bd5d8525533c83a9dfe64676df7e0ede517639b96746ce50a57d"}, - {file = "clickhouse_connect-0.5.23-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:58f95eff87c4d437f39b8192532fcf4e05dee2601b6313e2ad2c4913bef312e2"}, - {file = "clickhouse_connect-0.5.23-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:db6b9ad563d5fbc1729249fed60c657f4844eb27105241f5142dae77c007641a"}, - {file = "clickhouse_connect-0.5.23-cp311-cp311-win32.whl", hash = "sha256:ca9ebf6d5a86e336f61c0b9364fb9ae944b6b38195d3b92db2bf712a50f1c213"}, - {file = "clickhouse_connect-0.5.23-cp311-cp311-win_amd64.whl", hash = "sha256:e432a51dd58e0c0921e51df53f33ec0c8740109bb84b65beb0b1702914356a0e"}, - {file = "clickhouse_connect-0.5.23-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:76927edeab12e2539e41e64b8b8923fe636e9a52a6edf58f4c588e5948d0c2bd"}, - {file = "clickhouse_connect-0.5.23-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97902396d33a4b02a9515b37fb56e4dac392f2421e1e53874524a3829a861595"}, - {file = "clickhouse_connect-0.5.23-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef8edec9769656346e74f02948f9565d9f3ba0ca1021d879fb410e30a54f8478"}, - {file = "clickhouse_connect-0.5.23-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:acb72211fad7ea02d16baca6a57a184142b58e8604de32faf4cdb19e18930110"}, - {file = "clickhouse_connect-0.5.23-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d14331736b32c14a8d432ea4785c3a56c12c669ecb0c4049b3a2567a3d10ae18"}, - {file = "clickhouse_connect-0.5.23-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:df465815aba293ed2f96885491c900ab34a2a6b98ff9b446d777938802ac127d"}, - {file = "clickhouse_connect-0.5.23-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1245ad0cb77871b3f3bc87d55bbbc8137125cb71a073d4a522f12707fed1f3a3"}, - {file = "clickhouse_connect-0.5.23-cp37-cp37m-win32.whl", hash = "sha256:e939f6429784d175863bbf321f781510aa165b01e5442a7fb7fa7885071d1846"}, - {file = "clickhouse_connect-0.5.23-cp37-cp37m-win_amd64.whl", hash = "sha256:79f5863afcbad0b9bd37a080be4a710c3562a658d474df4e07be29b33c324cc0"}, - {file = "clickhouse_connect-0.5.23-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7652dd19953cffd51c75cdfef70c763d6dc631af89c4988c29d20e129f391b10"}, - {file = "clickhouse_connect-0.5.23-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:596012ec214b5f4b07e148686669e2be10b83522cd7c72cdb38f8b9bcd181d62"}, - {file = "clickhouse_connect-0.5.23-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91c355f7a203c77aad56cc9c73f1d570446e4ed5bb57b3c90e8c74956bf7c305"}, - {file = "clickhouse_connect-0.5.23-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc67f14f1d6eaa65a48e1170407b1cedd9cdc3b90506c090ea67ba4c4307e1cb"}, - {file = "clickhouse_connect-0.5.23-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ec68ed44ab02ce4d97998bdd215efca5e863dab0819427057bb5ec0e97ca8a6"}, - {file = "clickhouse_connect-0.5.23-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:602fcac024b6314f96f39b3ff0fb1aa54bfe9d2de1d3958ddcedb19616129535"}, - {file = "clickhouse_connect-0.5.23-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3c636c9154d2d9b57188b20ec42524e2258029874442259a605171a420ca0d52"}, - {file = "clickhouse_connect-0.5.23-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:46fd5427353690b68cb41e808919819f376ab503297fbfc59b51506181cd4410"}, - {file = "clickhouse_connect-0.5.23-cp38-cp38-win32.whl", hash = "sha256:6594d3189f4f67e49775aec12ef593052b056aa338d0b7a27905a1176979ae5f"}, - {file = "clickhouse_connect-0.5.23-cp38-cp38-win_amd64.whl", hash = "sha256:69cd6eb7c220cc55f3e9ff6391f7d02fa789740983863acd7e68bdd3956163c6"}, - {file = "clickhouse_connect-0.5.23-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1afd4089296678ed20d38e100b121ea9caa05a24f9cdc66b72ded8ed06d705b8"}, - {file = "clickhouse_connect-0.5.23-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:381279e5382086ae07e988770e38b601342165a577c606589616315324545135"}, - {file = "clickhouse_connect-0.5.23-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3cbf47331b7c220a558be6c6667599f53fc0070c0abdcc27d7bbc215b56da61"}, - {file = "clickhouse_connect-0.5.23-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a924dd889c7c740f7cd8cbb7f4d3fa3e7f794400d50adaa1190f1cd8786f238"}, - {file = "clickhouse_connect-0.5.23-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:37069cf6aa4fd18e7daaa7be52afebe51ae57ebdc001d1c0c1784bfa474cebda"}, - {file = "clickhouse_connect-0.5.23-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c3a29c3473f1c0c60a8639c48487c0ba220636c79a1f37a7fdb1a3ee2392b8a1"}, - {file = "clickhouse_connect-0.5.23-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:52c469867ce021a1b0e831a0c2544062ecc6df4cae93f51895c993c09e9092ee"}, - {file = "clickhouse_connect-0.5.23-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:da233d6584109ee96b1e35137ac5866d38a1ec3b62b36cd0ed05d573729f5234"}, - {file = "clickhouse_connect-0.5.23-cp39-cp39-win32.whl", hash = "sha256:5cb9564f0634f3a1f78ba122c7d05288b46a4c2fc464b378e138b8c7d02f188a"}, - {file = "clickhouse_connect-0.5.23-cp39-cp39-win_amd64.whl", hash = "sha256:22ba7629e2f96b22c41ea018386d1295a379d993c9a8db8721f0fd1c3903e2c0"}, - {file = "clickhouse_connect-0.5.23-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0af8c5162c7c0dbf0be46d0ac5ab32a2e30b9e2581c0c90b51abcbac7a1959d9"}, - {file = "clickhouse_connect-0.5.23-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4624e496b709d55ae76f469040f5ea98a281045adf4bd7f717812a3273fcffa"}, - {file = "clickhouse_connect-0.5.23-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5b812c30283d54e09cb66d33a69352e06a520a0798c51ea2c9b164c9328880b"}, - {file = "clickhouse_connect-0.5.23-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a0ff4b37b994fb82e525e4c0ac3e645964efb8ee78499dfbf4bbe7cda4299399"}, - {file = "clickhouse_connect-0.5.23-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:7d595f2b6c7dbbc20f78c0191bb14b186b049069a1249460e912514601818bd8"}, - {file = "clickhouse_connect-0.5.23-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:944ec9e9f7bf5cdb35f077d641dd5dd2ce4fd33ca032c3908f1cccba1a54ae31"}, - {file = "clickhouse_connect-0.5.23-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e59893f62fa538cbcdd61793672ecf6866f2842212f19efecf1d117c0ccd1460"}, - {file = "clickhouse_connect-0.5.23-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f02750cd9d1a9c5a83a94d347caef9cf15109574e374dddf3e541ab4f9272ea"}, - {file = "clickhouse_connect-0.5.23-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:21c5909075514df1cee78d7b854426df15af0a6fc7f6f24626f9d0b928a7a5a0"}, - {file = "clickhouse_connect-0.5.23-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b289d8fc4ed0a71821f23c10acc10f013c7bed96e8fb6ee81ad8f5a6c8fe0ee2"}, - {file = "clickhouse_connect-0.5.23-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f27bcc8709799d35a8e5b0050db746883cf94077b7f5f31f4fb8d86096cf272c"}, - {file = "clickhouse_connect-0.5.23-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60c659f95364617dec828c5fd64b1ec24712bc1b81dab104b118ea2802c8ff70"}, - {file = "clickhouse_connect-0.5.23-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:520cd348dde41c3f43e3ce035c5db7d18e0a0b810d8d1cd08b21e2cc6abb7928"}, - {file = "clickhouse_connect-0.5.23-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4aa370480010ab4b24a28b07533d123e99ca789369c5e8a3cff9bd96cdcea56e"}, - {file = "clickhouse_connect-0.5.23-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:90b3dfcdd92cc42bb14a3f2d98ddfee772c0951bcc2443a178d84c27c9850d30"}, + {file = "clickhouse-connect-0.5.24.tar.gz", hash = "sha256:f1c6a4a20c19612eedaf1cea82e532010942cb08a29326db74cce0ea48bbe56d"}, + {file = "clickhouse_connect-0.5.24-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5b91584305b6133eff83e8a0436b3c48681dd44dcf8b2f5b54d558bafd30afa6"}, + {file = "clickhouse_connect-0.5.24-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:17f3ca231aeff7c9f316dc03cba49ea8cd1e91e0f129519f8857f0e1d9aa7f49"}, + {file = "clickhouse_connect-0.5.24-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b126b324ca9e34662bc07335f55ff51f9a5a5c5e4df97778f0a427b4bde8cfa"}, + {file = "clickhouse_connect-0.5.24-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c756b8f290fc68af83129d378b749e74c40560107b926ef047c098b7c95a2ad"}, + {file = "clickhouse_connect-0.5.24-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:486f538781d765993cc2b6f30ef8c274674b1be2c36dc03767d14feea24df566"}, + {file = "clickhouse_connect-0.5.24-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:67cfb63b155c36413ff301c321de09e2476a936dc784c7954a63d612ec66f1ec"}, + {file = "clickhouse_connect-0.5.24-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:56b004a0e001e49a2b6a022a98832b5558642299de9c808cf7b9333180f28e1b"}, + {file = "clickhouse_connect-0.5.24-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:68bae08ef93aa21e02c961c79f2932cc88d0682a91099ec2f007c032ab4b68e1"}, + {file = "clickhouse_connect-0.5.24-cp310-cp310-win32.whl", hash = "sha256:b7f73598f118c7466230f7149de0b4e1af992b2ac086a9200ac0011ab03ee468"}, + {file = "clickhouse_connect-0.5.24-cp310-cp310-win_amd64.whl", hash = "sha256:5b83b4c6994e43ce3192c11ac4eb84f8ac8b6317d860fc2c4ff8f8f3609b20c1"}, + {file = "clickhouse_connect-0.5.24-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ed329a93171ca867df9b903b95992d9dec2e256a657e16a88d27452dfe8f064e"}, + {file = "clickhouse_connect-0.5.24-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9bc64de89be44c30bf036aab551da196e11ebf14502533b6e2a0e8ca60c27599"}, + {file = "clickhouse_connect-0.5.24-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84adbe15ad0dd745aa1b2a183cf4d1573d39cdb81e9d0a2d37571805dfda4cd7"}, + {file = "clickhouse_connect-0.5.24-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a50f7f3756c64791fa8a4ec73f87954a6c3aa44523394ad22e13e31ba1cd9c25"}, + {file = "clickhouse_connect-0.5.24-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:08499995addd7d0e758086622d32aa8f8fdf6dde61bedb106f453191b16af15f"}, + {file = "clickhouse_connect-0.5.24-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d8607c4b388a46b312fd34cdd26fe958002e414c0320aad0e24ac93854191325"}, + {file = "clickhouse_connect-0.5.24-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:f0adcfbda306a1aa9f3cdc2f638b36c748c68104be97d9dc935c130ad632be82"}, + {file = "clickhouse_connect-0.5.24-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2abee0170d60d0621f7feec6b1e9c7434e3bb23a7b025d32a513f2df969b9a2d"}, + {file = "clickhouse_connect-0.5.24-cp311-cp311-win32.whl", hash = "sha256:d6f7ea32b46a5fafa49a85b94b18902af38b0910f34ac588ec95b5b66faf7855"}, + {file = "clickhouse_connect-0.5.24-cp311-cp311-win_amd64.whl", hash = "sha256:f0ae6e14f526c5fe504103d00992bf8e0ab3359266664b327c273e16f957545d"}, + {file = "clickhouse_connect-0.5.24-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:dc0b18678b66160ca4ca6ce7fe074188975546c5d196092ef06510eb16067964"}, + {file = "clickhouse_connect-0.5.24-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91a6d666c4c3f4dea7bca84098a4624102cb3efa7f882352e8b914238b0ab3b0"}, + {file = "clickhouse_connect-0.5.24-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1732ea5fddf201425baf53d1434516c1242184139d61202f885575cb8742167c"}, + {file = "clickhouse_connect-0.5.24-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:be9c23721caacc52e9f75ba2239a5ca5bbdbafa913d36bcddf9eaf33578ba937"}, + {file = "clickhouse_connect-0.5.24-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b9aee9588b863ab3d33c11e9d2f350cee1f17753db74cedd3eb2bb4fc5ed31d1"}, + {file = "clickhouse_connect-0.5.24-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f7158f70e5ba787f64f01098fa729942d1d4dfd1a46c4519aab10ed3a4b32ead"}, + {file = "clickhouse_connect-0.5.24-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6684d253580c2e9cbcab8322189ca66fafc27ccabf67da58f178b31a09ecb60f"}, + {file = "clickhouse_connect-0.5.24-cp37-cp37m-win32.whl", hash = "sha256:ba015b5337ecab0e9064eed3966acd2fe2c10f0391fc5f28d8c0fd73802d0810"}, + {file = "clickhouse_connect-0.5.24-cp37-cp37m-win_amd64.whl", hash = "sha256:34feb3cb81298beff8e2be233719cf1271fd0f1aca2a0ae5dfff9716f9ab94c1"}, + {file = "clickhouse_connect-0.5.24-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5ae2551daec4731373bffc6bc9d3e30a5dfbc0bdceb66cbc93c56dd0797c0740"}, + {file = "clickhouse_connect-0.5.24-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2cf26c82f3bd03e3088251f249776285a01da3268936d88d98b7cbecb2783497"}, + {file = "clickhouse_connect-0.5.24-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0437c44d342edada639fed6f5064226cc9ad9f37406ea1cf550a50cb3f66db5a"}, + {file = "clickhouse_connect-0.5.24-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e7b5f68b7bae44ec5dfc80510bb81f9f2af88662681c103d5a58da170f4eb78"}, + {file = "clickhouse_connect-0.5.24-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bc0ccf9ef68377291aba32dc7754b8aab658c2b4cfe06488140114f8abbef819"}, + {file = "clickhouse_connect-0.5.24-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1e9c3f146bdb1929223ebba04610ebf7bbbed313ee452754268c546966eff9db"}, + {file = "clickhouse_connect-0.5.24-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:f7e31461ce8e13e2b9f67b21e2ac7bd1121420d85bf6dc888082dfd2f6ca9bc4"}, + {file = "clickhouse_connect-0.5.24-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e7b9b5a24cad361845f1d138ba9fb45f690c84583ca584adac76379a65fd8c00"}, + {file = "clickhouse_connect-0.5.24-cp38-cp38-win32.whl", hash = "sha256:7d223477041ae31b62917b5f9abeaa468fe2a1efa8391070da4258a41fdc7643"}, + {file = "clickhouse_connect-0.5.24-cp38-cp38-win_amd64.whl", hash = "sha256:c82fcf42d9a2318cf53086147376c31246e3842b73a09b4bac16a6f0c299a294"}, + {file = "clickhouse_connect-0.5.24-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:586d7193ece84ddc2608fdc29cd10cc80eff26f283b2ad9d738bbd522f1f84cd"}, + {file = "clickhouse_connect-0.5.24-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:71b452bed17aee315b93944174053cd84dc5efb245d4a556a2e49b78022f7ed6"}, + {file = "clickhouse_connect-0.5.24-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:788722210e636bec7a870b0625999f97c3285bc19fd46763b58472ee445b67e9"}, + {file = "clickhouse_connect-0.5.24-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:268e3375d9a3985ea961cb1be338c1d13154b617f5eb027ace0e8670de9501ce"}, + {file = "clickhouse_connect-0.5.24-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:28ea9abd595d7400e3ef2842f5e9db5307133dfa24d97a8c45f71713048bad97"}, + {file = "clickhouse_connect-0.5.24-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:00b0ac033dc47e0409a19ff974d938006a198445980028d911a47ba05facf6cd"}, + {file = "clickhouse_connect-0.5.24-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:601a26ddb18e266e79b76d1672ac15ef5b6043ea17ba4c9dc3dc80130a0775d9"}, + {file = "clickhouse_connect-0.5.24-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:eb502ccb7c5dcb907cf4c8316f9b787e4bd3a7b65cd8cbc37b24c5e9c890a801"}, + {file = "clickhouse_connect-0.5.24-cp39-cp39-win32.whl", hash = "sha256:e6acedfd795cd1db7d89f21597389805e583f2b4ae9495cb0b89b8eda13ff6ad"}, + {file = "clickhouse_connect-0.5.24-cp39-cp39-win_amd64.whl", hash = "sha256:921d3a8a287844c031c470547c07dd5b7454c883c44f13e1d4f5b9d0896444d2"}, + {file = "clickhouse_connect-0.5.24-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ec051a1f6f3912f2f3b659d3e3c344a67f676d2d42583885b3ed8365c51753b2"}, + {file = "clickhouse_connect-0.5.24-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b116538fd7d75df991b211a3db311c158a2664301b2f5d1ffc18feb5b5da89d"}, + {file = "clickhouse_connect-0.5.24-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b116747e4b187d3aac49a51e865a4fe0c11b39775724f0d7f719b4222810a5a4"}, + {file = "clickhouse_connect-0.5.24-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4fa54e11e651979d9a4e355564d2128c6a8394d4cffda295a8188c9869ab93cc"}, + {file = "clickhouse_connect-0.5.24-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:7c17e691e27d3b2e950cb2f597f0a895eb6b9d6717e886fafae861d34ac5bbb0"}, + {file = "clickhouse_connect-0.5.24-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:34e2ae809ac1244da6fa67c4021431f9a1865d14c6df2d7fe57d22841f361497"}, + {file = "clickhouse_connect-0.5.24-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e7b2ef89e9c1c92a09988a812626f7d529acfda93f420b75e59fe2981960886"}, + {file = "clickhouse_connect-0.5.24-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6200bdf94a52847d3f10ab8675c58db9ff3e90ce6ee98bc0c49f01c74d934798"}, + {file = "clickhouse_connect-0.5.24-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4def3ee218f6fbb320fbb1c5c1bb3b23753b9e56e50759fc396ea70631dff846"}, + {file = "clickhouse_connect-0.5.24-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:378f6a6289080f0c103f17eda9f8edcabc4878eb783e6b4e596d8bf8f543244e"}, + {file = "clickhouse_connect-0.5.24-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e29389baa14a3f1db4e52b32090e1e32533496e35833514c689b190f26dfb039"}, + {file = "clickhouse_connect-0.5.24-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7418e2c6533eebf0de9f3e85f1e3b6095d1a0bf42e4fed479f92f538725ff666"}, + {file = "clickhouse_connect-0.5.24-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3f23f819f20d130daed64ba058e01336e2f5f6d4b9f576038c0b800473af1ac"}, + {file = "clickhouse_connect-0.5.24-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a846fc412475d55d7727c8a82ba1247b1b7ff0c6341a1818f99fd348ee9b1580"}, + {file = "clickhouse_connect-0.5.24-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:34afc74ea27dcb85c1929f6105c4701566f51a1216bd6648b63ccb4871906729"}, ] [package.dependencies] @@ -766,6 +800,21 @@ files = [ {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, ] +[[package]] +name = "deprecation" +version = "2.1.0" +description = "A library to handle automated deprecations" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a"}, + {file = "deprecation-2.1.0.tar.gz", hash = "sha256:72b3bde64e5d778694b0cf68178aed03d15e15477116add3fb773e581f9518ff"}, +] + +[package.dependencies] +packaging = "*" + [[package]] name = "dnspython" version = "2.3.0" @@ -787,6 +836,18 @@ idna = ["idna (>=2.1,<4.0)"] trio = ["trio (>=0.14,<0.23)"] wmi = ["wmi (>=1.5.1,<2.0.0)"] +[[package]] +name = "docutils" +version = "0.20" +description = "Docutils -- Python Documentation Utilities" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "docutils-0.20-py3-none-any.whl", hash = "sha256:a428f10de4de4774389734c986a01b4af2d802d26717108b0f1b9356862937c5"}, + {file = "docutils-0.20.tar.gz", hash = "sha256:f75a5a52fbcacd81b47e42888ad2b380748aaccfb3f13af0fe69deb759f01eb6"}, +] + [[package]] name = "docx2txt" version = "0.8" @@ -798,6 +859,18 @@ files = [ {file = "docx2txt-0.8.tar.gz", hash = "sha256:2c06d98d7cfe2d3947e5760a57d924e3ff07745b379c8737723922e7009236e5"}, ] +[[package]] +name = "dotty-dict" +version = "1.3.1" +description = "Dictionary wrapper for quick access to deeply nested keys." +category = "main" +optional = false +python-versions = ">=3.5,<4.0" +files = [ + {file = "dotty_dict-1.3.1-py3-none-any.whl", hash = "sha256:5022d234d9922f13aa711b4950372a06a6d64cb6d6db9ba43d0ba133ebfce31f"}, + {file = "dotty_dict-1.3.1.tar.gz", hash = "sha256:4b016e03b8ae265539757a53eba24b9bfda506fb94fbce0bee843c6f05541a15"}, +] + [[package]] name = "duckdb" version = "0.7.1" @@ -1050,6 +1123,52 @@ smb = ["smbprotocol"] ssh = ["paramiko"] tqdm = ["tqdm"] +[[package]] +name = "gitdb" +version = "4.0.10" +description = "Git Object Database" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "gitdb-4.0.10-py3-none-any.whl", hash = "sha256:c286cf298426064079ed96a9e4a9d39e7f3e9bf15ba60701e95f5492f28415c7"}, + {file = "gitdb-4.0.10.tar.gz", hash = "sha256:6eb990b69df4e15bad899ea868dc46572c3f75339735663b81de79b06f17eb9a"}, +] + +[package.dependencies] +smmap = ">=3.0.1,<6" + +[[package]] +name = "gitpython" +version = "3.1.31" +description = "GitPython is a Python library used to interact with Git repositories" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "GitPython-3.1.31-py3-none-any.whl", hash = "sha256:f04893614f6aa713a60cbbe1e6a97403ef633103cdd0ef5eb6efe0deb98dbe8d"}, + {file = "GitPython-3.1.31.tar.gz", hash = "sha256:8ce3bcf69adfdf7c7d503e78fd3b1c492af782d58893b650adb2ac8912ddd573"}, +] + +[package.dependencies] +gitdb = ">=4.0.1,<5" + +[[package]] +name = "gotrue" +version = "1.0.1" +description = "Python Client Library for GoTrue" +category = "main" +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "gotrue-1.0.1-py3-none-any.whl", hash = "sha256:005e8bc8d7f2da87606504c9c269f2943245843e2ddefb99e583f45a8612e715"}, + {file = "gotrue-1.0.1.tar.gz", hash = "sha256:9d7e01703beb3c017bcf0461f518f93bc5a400720df3ba8c082264d405cee4d0"}, +] + +[package.dependencies] +httpx = ">=0.23.0,<0.24.0" +pydantic = ">=1.10.0,<2.0.0" + [[package]] name = "greenlet" version = "2.0.2" @@ -1453,6 +1572,26 @@ files = [ {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, ] +[[package]] +name = "importlib-metadata" +version = "6.6.0" +description = "Read metadata from Python packages" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "importlib_metadata-6.6.0-py3-none-any.whl", hash = "sha256:43dd286a2cd8995d5eaef7fee2066340423b818ed3fd70adf0bad5f1fac53fed"}, + {file = "importlib_metadata-6.6.0.tar.gz", hash = "sha256:92501cdf9cc66ebd3e612f1b4f0c0765dfa42f0fa38ffb319b6bd84dd675d705"}, +] + +[package.dependencies] +zipp = ">=0.5" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] + [[package]] name = "iniconfig" version = "2.0.0" @@ -1465,6 +1604,18 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "invoke" +version = "1.7.3" +description = "Pythonic task execution" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "invoke-1.7.3-py3-none-any.whl", hash = "sha256:d9694a865764dd3fd91f25f7e9a97fb41666e822bbb00e670091e3f43933574d"}, + {file = "invoke-1.7.3.tar.gz", hash = "sha256:41b428342d466a82135d5ab37119685a989713742be46e42a3a399d685579314"}, +] + [[package]] name = "isodate" version = "0.6.1" @@ -1480,6 +1631,41 @@ files = [ [package.dependencies] six = "*" +[[package]] +name = "jaraco-classes" +version = "3.2.3" +description = "Utility functions for Python class constructs" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jaraco.classes-3.2.3-py3-none-any.whl", hash = "sha256:2353de3288bc6b82120752201c6b1c1a14b058267fa424ed5ce5984e3b922158"}, + {file = "jaraco.classes-3.2.3.tar.gz", hash = "sha256:89559fa5c1d3c34eff6f631ad80bb21f378dbcbb35dd161fd2c6b93f5be2f98a"}, +] + +[package.dependencies] +more-itertools = "*" + +[package.extras] +docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)"] +testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + +[[package]] +name = "jeepney" +version = "0.8.0" +description = "Low-level, pure Python DBus protocol wrapper." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jeepney-0.8.0-py3-none-any.whl", hash = "sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755"}, + {file = "jeepney-0.8.0.tar.gz", hash = "sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806"}, +] + +[package.extras] +test = ["async-timeout", "pytest", "pytest-asyncio (>=0.17)", "pytest-trio", "testpath", "trio"] +trio = ["async_generator", "trio"] + [[package]] name = "jinja2" version = "3.1.2" @@ -1510,16 +1696,40 @@ files = [ {file = "joblib-1.2.0.tar.gz", hash = "sha256:e1cee4a79e4af22881164f218d4311f60074197fb707e082e803b61f6d137018"}, ] +[[package]] +name = "keyring" +version = "23.13.1" +description = "Store and access your passwords safely." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "keyring-23.13.1-py3-none-any.whl", hash = "sha256:771ed2a91909389ed6148631de678f82ddc73737d85a927f382a8a1b157898cd"}, + {file = "keyring-23.13.1.tar.gz", hash = "sha256:ba2e15a9b35e21908d0aaf4e0a47acc52d6ae33444df0da2b49d41a46ef6d678"}, +] + +[package.dependencies] +importlib-metadata = {version = ">=4.11.4", markers = "python_version < \"3.12\""} +"jaraco.classes" = "*" +jeepney = {version = ">=0.4.2", markers = "sys_platform == \"linux\""} +pywin32-ctypes = {version = ">=0.2.0", markers = "sys_platform == \"win32\""} +SecretStorage = {version = ">=3.2", markers = "sys_platform == \"linux\""} + +[package.extras] +completion = ["shtab"] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)"] +testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + [[package]] name = "langchain" -version = "0.0.162" +version = "0.0.166" description = "Building applications with LLMs through composability" category = "main" optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langchain-0.0.162-py3-none-any.whl", hash = "sha256:fd499fa047a5e52233bdc7fff442be9175b40b0fa3e6edbea58c9996f6750e38"}, - {file = "langchain-0.0.162.tar.gz", hash = "sha256:90712ca5b953475140fedf019c0c84dc453b15465d460ae0ef31026f5e949024"}, + {file = "langchain-0.0.166-py3-none-any.whl", hash = "sha256:32417cc38ba211d46c3e97f29cb8124175fe46047bda14a4c634351b005acd21"}, + {file = "langchain-0.0.166.tar.gz", hash = "sha256:fb1e90eb0aeef9c574e6683586bfbfed1974e187dd8261b571cb33888c35a92e"}, ] [package.dependencies] @@ -1537,10 +1747,13 @@ tenacity = ">=8.1.0,<9.0.0" tqdm = ">=4.48.0" [package.extras] -all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.2.6,<0.3.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=3,<4)", "deeplake (>=3.3.0,<4.0.0)", "duckduckgo-search (>=2.8.6,<3.0.0)", "elasticsearch (>=8,<9)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jina (>=3.14,<4.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "lark (>=1.1.5,<2.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "opensearch-py (>=2.0.0,<3.0.0)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.1.2,<2.0.0)", "redis (>=4,<5)", "sentence-transformers (>=2,<3)", "spacy (>=3,<4)", "tensorflow-text (>=2.11.0,<3.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] +all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.2.6,<0.3.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=3,<4)", "deeplake (>=3.3.0,<4.0.0)", "docarray (>=0.31.0,<0.32.0)", "duckduckgo-search (>=2.8.6,<3.0.0)", "elasticsearch (>=8,<9)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "hnswlib (>=0.7.0,<0.8.0)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jina (>=3.14,<4.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "lark (>=1.1.5,<2.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "opensearch-py (>=2.0.0,<3.0.0)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "protobuf (==3.19)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.1.2,<2.0.0)", "redis (>=4,<5)", "sentence-transformers (>=2,<3)", "spacy (>=3,<4)", "tensorflow-text (>=2.11.0,<3.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] azure = ["azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "openai (>=0,<1)"] cohere = ["cohere (>=3,<4)"] embeddings = ["sentence-transformers (>=2,<3)"] +extended-testing = ["pdfminer-six (>=20221105,<20221106)", "pypdf (>=3.4.0,<4.0.0)"] +hnswlib = ["docarray (>=0.31.0,<0.32.0)", "hnswlib (>=0.7.0,<0.8.0)", "protobuf (==3.19)"] +in-memory-store = ["docarray (>=0.31.0,<0.32.0)"] llms = ["anthropic (>=0.2.6,<0.3.0)", "cohere (>=3,<4)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "torch (>=1,<3)", "transformers (>=4,<5)"] openai = ["openai (>=0,<1)"] qdrant = ["qdrant-client (>=1.1.2,<2.0.0)"] @@ -1835,6 +2048,18 @@ files = [ {file = "monotonic-1.6.tar.gz", hash = "sha256:3a55207bcfed53ddd5c5bae174524062935efed17792e9de2ad0205ce9ad63f7"}, ] +[[package]] +name = "more-itertools" +version = "9.1.0" +description = "More routines for operating on iterables, beyond itertools" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "more-itertools-9.1.0.tar.gz", hash = "sha256:cabaa341ad0389ea83c17a94566a53ae4c9d07349861ecb14dc6d0345cf9ac5d"}, + {file = "more_itertools-9.1.0-py3-none-any.whl", hash = "sha256:d2bc7f02446e86a68911e58ded76d6561eea00cddfb2a91e7019bbb586c799f3"}, +] + [[package]] name = "mpmath" version = "1.3.0" @@ -2231,6 +2456,20 @@ sql-other = ["SQLAlchemy (>=1.4.16)"] test = ["hypothesis (>=6.34.2)", "pytest (>=7.0.0)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.6.3)"] +[[package]] +name = "pgvector" +version = "0.1.7" +description = "pgvector support for Python" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pgvector-0.1.7-py2.py3-none-any.whl", hash = "sha256:b0da0289959372f916b96c1da7c57437725c7aa33fa0c75b4a53c3677369bdd5"}, +] + +[package.dependencies] +numpy = "*" + [[package]] name = "pillow" version = "9.5.0" @@ -2337,6 +2576,21 @@ urllib3 = ">=1.21.1" [package.extras] grpc = ["googleapis-common-protos (>=1.53.0)", "grpc-gateway-protoc-gen-openapiv2 (==0.1.0)", "grpcio (>=1.44.0)", "lz4 (>=3.1.3)", "protobuf (==3.19.3)"] +[[package]] +name = "pkginfo" +version = "1.9.6" +description = "Query metadata from sdists / bdists / installed packages." +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pkginfo-1.9.6-py3-none-any.whl", hash = "sha256:4b7a555a6d5a22169fcc9cf7bfd78d296b0361adad412a346c1226849af5e546"}, + {file = "pkginfo-1.9.6.tar.gz", hash = "sha256:8fd5896e8718a4372f0ea9cc9d96f6417c9b986e23a4d116dda26b62cc29d046"}, +] + +[package.extras] +testing = ["pytest", "pytest-cov"] + [[package]] name = "pluggy" version = "1.0.0" @@ -2373,6 +2627,24 @@ docs = ["sphinx (>=1.7.1)"] redis = ["redis"] tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)"] +[[package]] +name = "postgrest" +version = "0.10.6" +description = "PostgREST client for Python. This library provides an ORM interface to PostgREST." +category = "main" +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "postgrest-0.10.6-py3-none-any.whl", hash = "sha256:7302068ce3cd80e761e35d6d665d3e65632442488258e3299c008013119d7fe6"}, + {file = "postgrest-0.10.6.tar.gz", hash = "sha256:ee145d53ea8642a16fa7f42848443baa08ae1e6f41e071865f5f54bcb3b24aa3"}, +] + +[package.dependencies] +deprecation = ">=2.1.0,<3.0.0" +httpx = ">=0.23.0,<0.24.0" +pydantic = ">=1.9.0,<2.0.0" +strenum = ">=0.4.9,<0.5.0" + [[package]] name = "posthog" version = "3.0.1" @@ -2420,6 +2692,44 @@ files = [ {file = "protobuf-4.23.0.tar.gz", hash = "sha256:5f1eba1da2a2f3f7df469fccddef3cc060b8a16cfe3cc65961ad36b4dbcf59c5"}, ] +[[package]] +name = "psycopg2" +version = "2.9.6" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "psycopg2-2.9.6-cp310-cp310-win32.whl", hash = "sha256:f7a7a5ee78ba7dc74265ba69e010ae89dae635eea0e97b055fb641a01a31d2b1"}, + {file = "psycopg2-2.9.6-cp310-cp310-win_amd64.whl", hash = "sha256:f75001a1cbbe523e00b0ef896a5a1ada2da93ccd752b7636db5a99bc57c44494"}, + {file = "psycopg2-2.9.6-cp311-cp311-win32.whl", hash = "sha256:53f4ad0a3988f983e9b49a5d9765d663bbe84f508ed655affdb810af9d0972ad"}, + {file = "psycopg2-2.9.6-cp311-cp311-win_amd64.whl", hash = "sha256:b81fcb9ecfc584f661b71c889edeae70bae30d3ef74fa0ca388ecda50b1222b7"}, + {file = "psycopg2-2.9.6-cp36-cp36m-win32.whl", hash = "sha256:11aca705ec888e4f4cea97289a0bf0f22a067a32614f6ef64fcf7b8bfbc53744"}, + {file = "psycopg2-2.9.6-cp36-cp36m-win_amd64.whl", hash = "sha256:36c941a767341d11549c0fbdbb2bf5be2eda4caf87f65dfcd7d146828bd27f39"}, + {file = "psycopg2-2.9.6-cp37-cp37m-win32.whl", hash = "sha256:869776630c04f335d4124f120b7fb377fe44b0a7645ab3c34b4ba42516951889"}, + {file = "psycopg2-2.9.6-cp37-cp37m-win_amd64.whl", hash = "sha256:a8ad4a47f42aa6aec8d061fdae21eaed8d864d4bb0f0cade5ad32ca16fcd6258"}, + {file = "psycopg2-2.9.6-cp38-cp38-win32.whl", hash = "sha256:2362ee4d07ac85ff0ad93e22c693d0f37ff63e28f0615a16b6635a645f4b9214"}, + {file = "psycopg2-2.9.6-cp38-cp38-win_amd64.whl", hash = "sha256:d24ead3716a7d093b90b27b3d73459fe8cd90fd7065cf43b3c40966221d8c394"}, + {file = "psycopg2-2.9.6-cp39-cp39-win32.whl", hash = "sha256:1861a53a6a0fd248e42ea37c957d36950da00266378746588eab4f4b5649e95f"}, + {file = "psycopg2-2.9.6-cp39-cp39-win_amd64.whl", hash = "sha256:ded2faa2e6dfb430af7713d87ab4abbfc764d8d7fb73eafe96a24155f906ebf5"}, + {file = "psycopg2-2.9.6.tar.gz", hash = "sha256:f15158418fd826831b28585e2ab48ed8df2d0d98f502a2b4fe619e7d5ca29011"}, +] + +[[package]] +name = "psycopg2cffi" +version = "2.9.0" +description = ".. image:: https://travis-ci.org/chtd/psycopg2cffi.svg?branch=master" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "psycopg2cffi-2.9.0.tar.gz", hash = "sha256:7e272edcd837de3a1d12b62185eb85c45a19feda9e62fa1b120c54f9e8d35c52"}, +] + +[package.dependencies] +cffi = ">=1.0" +six = "*" + [[package]] name = "pycparser" version = "2.21" @@ -2528,6 +2838,21 @@ typing-extensions = ">=4.2.0" dotenv = ["python-dotenv (>=0.10.4)"] email = ["email-validator (>=1.0.3)"] +[[package]] +name = "pygments" +version = "2.15.1" +description = "Pygments is a syntax highlighting package written in Python." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "Pygments-2.15.1-py3-none-any.whl", hash = "sha256:db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1"}, + {file = "Pygments-2.15.1.tar.gz", hash = "sha256:8ace4d3c1dd481894b2005f560ead0f9f19ee64fe983366be1a21e171d12775c"}, +] + +[package.extras] +plugins = ["importlib-metadata"] + [[package]] name = "pyjwt" version = "2.7.0" @@ -2678,6 +3003,26 @@ files = [ [package.extras] cli = ["click (>=5.0)"] +[[package]] +name = "python-gitlab" +version = "3.14.0" +description = "Interact with GitLab API" +category = "main" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "python-gitlab-3.14.0.tar.gz", hash = "sha256:ef3b8960faeee9880f82b0872d807e3fab94ace12b0d2a8418a97875c8812d3c"}, + {file = "python_gitlab-3.14.0-py3-none-any.whl", hash = "sha256:da614c014c6860147783dde8c216218d8fc6bd83a8bd2e3929dcdf11b211aa58"}, +] + +[package.dependencies] +requests = ">=2.25.0" +requests-toolbelt = ">=0.10.1" + +[package.extras] +autocompletion = ["argcomplete (>=1.10.0,<3)"] +yaml = ["PyYaml (>=5.2)"] + [[package]] name = "python-multipart" version = "0.0.6" @@ -2709,6 +3054,38 @@ lxml = ">=3.1.0" Pillow = ">=3.3.2" XlsxWriter = ">=0.5.7" +[[package]] +name = "python-semantic-release" +version = "7.33.2" +description = "Automatic Semantic Versioning for Python projects" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "python-semantic-release-7.33.2.tar.gz", hash = "sha256:c23b4bb746e9ddbe1ba7497c48f7d81403e67a14ceb37928ef667c1fbee5e324"}, + {file = "python_semantic_release-7.33.2-py3-none-any.whl", hash = "sha256:9e4990cc0a4dc37482ac5ec7fe6f70f71681228f68f0fa39370415701fdcf632"}, +] + +[package.dependencies] +click = ">=7,<9" +click-log = ">=0.3,<1" +dotty-dict = ">=1.3.0,<2" +gitpython = ">=3.0.8,<4" +invoke = ">=1.4.1,<2" +packaging = "*" +python-gitlab = ">=2,<4" +requests = ">=2.25,<3" +semver = ">=2.10,<3" +tomlkit = ">=0.10,<1.0" +twine = ">=3,<4" +wheel = "*" + +[package.extras] +dev = ["black", "isort", "tox"] +docs = ["Jinja2 (==3.0.3)", "Sphinx (==1.3.6)"] +mypy = ["mypy", "types-requests"] +test = ["coverage (>=5,<6)", "mock (==1.3.0)", "pytest (>=7,<8)", "pytest-mock (>=2,<3)", "pytest-xdist (>=1,<2)", "responses (==0.13.3)"] + [[package]] name = "pytz" version = "2023.3" @@ -2745,6 +3122,18 @@ files = [ {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, ] +[[package]] +name = "pywin32-ctypes" +version = "0.2.0" +description = "" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "pywin32-ctypes-0.2.0.tar.gz", hash = "sha256:24ffc3b341d457d48e8922352130cf2644024a4ff09762a2261fd34c36ee5942"}, + {file = "pywin32_ctypes-0.2.0-py2.py3-none-any.whl", hash = "sha256:9dc2d991b3479cc2df15930958b674a48a227d5361d413827a4cfd0b5876fc98"}, +] + [[package]] name = "pyyaml" version = "6.0" @@ -2797,14 +3186,14 @@ files = [ [[package]] name = "qdrant-client" -version = "1.1.6" +version = "1.1.7" description = "Client library for the Qdrant vector search engine" category = "main" optional = false python-versions = ">=3.7,<3.12" files = [ - {file = "qdrant_client-1.1.6-py3-none-any.whl", hash = "sha256:757e8d65fb6d4305fe6dbb4b087bf62ea3f01c28652f81592800564748a73545"}, - {file = "qdrant_client-1.1.6.tar.gz", hash = "sha256:4b1be451e27e6c8058c565bcf92e5308483b79395f826343477ed376bf601cd3"}, + {file = "qdrant_client-1.1.7-py3-none-any.whl", hash = "sha256:4f5d883660b8193840d8982919ab813a0470ace9a7ff46ee730f909841be5319"}, + {file = "qdrant_client-1.1.7.tar.gz", hash = "sha256:686d86934bec2ebb70676fc0650c9a44a9e552e0149124ca5a22ee8533879deb"}, ] [package.dependencies] @@ -2812,24 +3201,62 @@ grpcio = ">=1.41.0" grpcio-tools = ">=1.41.0" httpx = {version = ">=0.14.0", extras = ["http2"]} numpy = {version = ">=1.21", markers = "python_version >= \"3.8\""} +portalocker = ">=2.7.0,<3.0.0" pydantic = ">=1.8,<2.0" typing-extensions = ">=4.0.0,<5.0.0" urllib3 = ">=1.26.14,<2.0.0" +[[package]] +name = "readme-renderer" +version = "37.3" +description = "readme_renderer is a library for rendering \"readme\" descriptions for Warehouse" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "readme_renderer-37.3-py3-none-any.whl", hash = "sha256:f67a16caedfa71eef48a31b39708637a6f4664c4394801a7b0d6432d13907343"}, + {file = "readme_renderer-37.3.tar.gz", hash = "sha256:cd653186dfc73055656f090f227f5cb22a046d7f71a841dfa305f55c9a513273"}, +] + +[package.dependencies] +bleach = ">=2.1.0" +docutils = ">=0.13.1" +Pygments = ">=2.5.1" + +[package.extras] +md = ["cmarkgfm (>=0.8.0)"] + +[[package]] +name = "realtime" +version = "1.0.0" +description = "" +category = "main" +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "realtime-1.0.0-py3-none-any.whl", hash = "sha256:ceab9e292211ab08b5792ac52b3fa25398440031d5b369bd5799b8125056e2d8"}, + {file = "realtime-1.0.0.tar.gz", hash = "sha256:14e540c4a0cc2736ae83e0cbd7efbbfb8b736df1681df2b9141556cb4848502d"}, +] + +[package.dependencies] +python-dateutil = ">=2.8.1,<3.0.0" +typing-extensions = ">=4.2.0,<5.0.0" +websockets = ">=10.3,<11.0" + [[package]] name = "redis" -version = "4.5.1" +version = "4.5.4" description = "Python client for Redis database and key-value store" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "redis-4.5.1-py3-none-any.whl", hash = "sha256:5deb072d26e67d2be1712603bfb7947ec3431fb0eec9c578994052e33035af6d"}, - {file = "redis-4.5.1.tar.gz", hash = "sha256:1eec3741cda408d3a5f84b78d089c8b8d895f21b3b050988351e925faf202864"}, + {file = "redis-4.5.4-py3-none-any.whl", hash = "sha256:2c19e6767c474f2e85167909061d525ed65bea9301c0770bb151e041b7ac89a2"}, + {file = "redis-4.5.4.tar.gz", hash = "sha256:73ec35da4da267d6847e47f68730fdd5f62e2ca69e3ef5885c6a78a9374c3893"}, ] [package.dependencies] -async-timeout = ">=4.0.2" +async-timeout = {version = ">=4.0.2", markers = "python_version <= \"3.11.2\""} [package.extras] hiredis = ["hiredis (>=1.0.0)"] @@ -2955,6 +3382,21 @@ urllib3 = ">=1.21.1,<1.27" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "requests-toolbelt" +version = "1.0.0" +description = "A utility belt for advanced users of python-requests" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, + {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, +] + +[package.dependencies] +requests = ">=2.0.1,<3.0.0" + [[package]] name = "rfc3986" version = "1.5.0" @@ -3055,6 +3497,34 @@ dev = ["flake8", "mypy", "pycodestyle", "typing_extensions"] doc = ["matplotlib (>2)", "numpydoc", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-panels (>=0.5.2)", "sphinx-tabs"] test = ["asv", "gmpy2", "mpmath", "pytest", "pytest-cov", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] +[[package]] +name = "secretstorage" +version = "3.3.3" +description = "Python bindings to FreeDesktop.org Secret Service API" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "SecretStorage-3.3.3-py3-none-any.whl", hash = "sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99"}, + {file = "SecretStorage-3.3.3.tar.gz", hash = "sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77"}, +] + +[package.dependencies] +cryptography = ">=2.0" +jeepney = ">=0.6" + +[[package]] +name = "semver" +version = "2.13.0" +description = "Python helper for Semantic Versioning (http://semver.org/)" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "semver-2.13.0-py2.py3-none-any.whl", hash = "sha256:ced8b23dceb22134307c1b8abfa523da14198793d9787ac838e70e29e77458d4"}, + {file = "semver-2.13.0.tar.gz", hash = "sha256:fa0fe2722ee1c3f57eac478820c3a5ae2f624af8264cbdf9000c980ff7f75e3f"}, +] + [[package]] name = "sentence-transformers" version = "2.2.2" @@ -3162,6 +3632,18 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "smmap" +version = "5.0.0" +description = "A pure Python implementation of a sliding window memory map manager" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"}, + {file = "smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"}, +] + [[package]] name = "sniffio" version = "1.3.0" @@ -3176,53 +3658,53 @@ files = [ [[package]] name = "sqlalchemy" -version = "2.0.12" +version = "2.0.13" description = "Database Abstraction Library" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "SQLAlchemy-2.0.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:10f1ff0ebe21d2cea89ead231ba3ecf75678463ab85f19ce2ce91207620737f3"}, - {file = "SQLAlchemy-2.0.12-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:978bee4ecbcdadf087220618409fb9be9509458df479528b70308f0599c7c519"}, - {file = "SQLAlchemy-2.0.12-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53b2c8adbcbb59732fb21a024aaa261983655845d86e3fc26a5676cec0ebaa09"}, - {file = "SQLAlchemy-2.0.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91f4b1bdc987ef85fe3a0ce5d26ac72ff8f60207b08272aa2a65494836391d69"}, - {file = "SQLAlchemy-2.0.12-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:dfd6385b662aea83e63dd4db5fe116eb11914022deb1745f0b57fa8470c18ffe"}, - {file = "SQLAlchemy-2.0.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5e9d390727c11b9a7e583bf6770de36895c0936bddb98ae93ae99282e6428d5f"}, - {file = "SQLAlchemy-2.0.12-cp310-cp310-win32.whl", hash = "sha256:a4709457f1c317e347051498b91fa2b86c4bcdebf93c84e6d121a4fc8a397307"}, - {file = "SQLAlchemy-2.0.12-cp310-cp310-win_amd64.whl", hash = "sha256:f0843132168b44ca33c5e5a2046c954775dde8c580ce27f5cf2e134d0d9919e4"}, - {file = "SQLAlchemy-2.0.12-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:32762dba51b663609757f861584a722093487f53737e76474cc6e190904dc31b"}, - {file = "SQLAlchemy-2.0.12-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5d709f43caee115b03b707b8cbbcb8b303045dd7cdc825b6d29857d71f3425ae"}, - {file = "SQLAlchemy-2.0.12-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fe98e9d26778d7711ceee2c671741b4f54c74677668481d733d6f70747d7690"}, - {file = "SQLAlchemy-2.0.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a3101252f3de9a18561c1fb0a68b1ee465485990aba458d4510f214bd5a582c"}, - {file = "SQLAlchemy-2.0.12-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6b1fa0ffc378a7061c452cb4a1f804fad1b3b8aa8d0552725531d27941b2e3ed"}, - {file = "SQLAlchemy-2.0.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c5268ec05c21e2ecf5bca09314bcaadfec01f02163088cd602db4379862958dd"}, - {file = "SQLAlchemy-2.0.12-cp311-cp311-win32.whl", hash = "sha256:77a06b0983faf9aa48ee6219d41ade39dee16ce90857cc181dbcf6918acd234d"}, - {file = "SQLAlchemy-2.0.12-cp311-cp311-win_amd64.whl", hash = "sha256:a022c588c0f413f8cddf9fcc597dbf317efeac4186d8bff9aa7f3219258348b0"}, - {file = "SQLAlchemy-2.0.12-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b6ceca432ce88ad12aab5b5896c343a1993c90b325d9193dcd055e73e18a0439"}, - {file = "SQLAlchemy-2.0.12-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e5501c78b5ab917f0f0f75ce7f0018f683a0a76e95f30e6561bf61c9ff69d43"}, - {file = "SQLAlchemy-2.0.12-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc67efd00ce7f428a446ce012673c03c63c5abb5dec3f33750087b8bdc173bf0"}, - {file = "SQLAlchemy-2.0.12-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1fac17c866111283cbcdb7024d646abb71fdd95f3ce975cf3710258bc55742fd"}, - {file = "SQLAlchemy-2.0.12-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:f30c5608c64fc9c1fa9a16277eb4784f782362566fe40ff8d283358c8f2c5fe0"}, - {file = "SQLAlchemy-2.0.12-cp37-cp37m-win32.whl", hash = "sha256:85b0efe1c71459ba435a6593f54a0e39334b16ba383e8010fdb9d0127ca51ba8"}, - {file = "SQLAlchemy-2.0.12-cp37-cp37m-win_amd64.whl", hash = "sha256:b76c2fde827522e21922418325c1b95c2d795cdecfb4bc261e4d37965199ee7f"}, - {file = "SQLAlchemy-2.0.12-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:aec5fb36b53125554ecc2285526eb5cc31b21f6cb059993c1c5ca831959de052"}, - {file = "SQLAlchemy-2.0.12-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4ad525b9dd17b478a2ed8580d7f2bc46b0f5889153c6b1c099729583e395b4b9"}, - {file = "SQLAlchemy-2.0.12-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9796d5c13b2b7f05084d0ce52528cf919f9bde9e0f10672a6393a4490415695"}, - {file = "SQLAlchemy-2.0.12-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e1d50592cb24d1947c374c666add65ded7c181ec98a89ed17abbe9b8b2e2ff4"}, - {file = "SQLAlchemy-2.0.12-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:bf83700faa9642388fbd3167db3f6cbb2e88cc8367b8c22204f3f408ee782d25"}, - {file = "SQLAlchemy-2.0.12-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:297b752d4f30350b64175bbbd57dc94c061a35f5d1dba088d0a367dbbebabc94"}, - {file = "SQLAlchemy-2.0.12-cp38-cp38-win32.whl", hash = "sha256:369f6564e68a9c60f0b9dde121def491e651a4ba8dcdd652a93f1cd5977cd85c"}, - {file = "SQLAlchemy-2.0.12-cp38-cp38-win_amd64.whl", hash = "sha256:7eb25b981cbc9e7df9f56ad7ec4c6d77323090ca4b7147fcdc09d66535377759"}, - {file = "SQLAlchemy-2.0.12-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f6ebadefc4331dda83c22519e1ea1e61104df6eb38abbb80ab91b0a8527a5c19"}, - {file = "SQLAlchemy-2.0.12-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3745dee26a7ee012598577ad3b8f6e6cd50a49b2afa0cde9db668da6bf2c2319"}, - {file = "SQLAlchemy-2.0.12-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09205893a84b6bedae0453d3f384f5d2a6499b6e45ad977549894cdcd85d8f1c"}, - {file = "SQLAlchemy-2.0.12-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8aad66215a3817a7a1d535769773333250de2653c89b53f7e2d42b677d398027"}, - {file = "SQLAlchemy-2.0.12-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e495ad05a13171fbb5d72fe5993469c8bceac42bcf6b8f9f117a518ee7fbc353"}, - {file = "SQLAlchemy-2.0.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:03206576ca53f55b9de6e890273e498f4b2e6e687a9db9859bdcd21df5a63e53"}, - {file = "SQLAlchemy-2.0.12-cp39-cp39-win32.whl", hash = "sha256:87b2c2d13c3d1384859b60eabb3139e169ce68ada1d2963dbd0c7af797f16efe"}, - {file = "SQLAlchemy-2.0.12-cp39-cp39-win_amd64.whl", hash = "sha256:3c053c3f4c4e45d4c8b27977647566c140d6de3f61a4e2acb92ea24cf9911c7f"}, - {file = "SQLAlchemy-2.0.12-py3-none-any.whl", hash = "sha256:e752c34f7a2057ebe82c856698b9f277c633d4aad006bddf7af74598567c8931"}, - {file = "SQLAlchemy-2.0.12.tar.gz", hash = "sha256:bddfc5bd1dee5db0fddc9dab26f800c283f3243e7281bbf107200fed30125f9c"}, + {file = "SQLAlchemy-2.0.13-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7ad24c85f2a1caf0cd1ae8c2fdb668777a51a02246d9039420f94bd7dbfd37ed"}, + {file = "SQLAlchemy-2.0.13-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db24d2738add6db19d66ca820479d2f8f96d3f5a13c223f27fa28dd2f268a4bd"}, + {file = "SQLAlchemy-2.0.13-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72746ec17a7d9c5acf2c57a6e6190ceba3dad7127cd85bb17f24e90acc0e8e3f"}, + {file = "SQLAlchemy-2.0.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:755f653d693f9b8f4286d987aec0d4279821bf8d179a9de8e8a5c685e77e57d6"}, + {file = "SQLAlchemy-2.0.13-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e0d20f27edfd6f35b388da2bdcd7769e4ffa374fef8994980ced26eb287e033a"}, + {file = "SQLAlchemy-2.0.13-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:37de4010f53f452e94e5ed6684480432cfe6a7a8914307ef819cd028b05b98d5"}, + {file = "SQLAlchemy-2.0.13-cp310-cp310-win32.whl", hash = "sha256:31f72bb300eed7bfdb373c7c046121d84fa0ae6f383089db9505ff553ac27cef"}, + {file = "SQLAlchemy-2.0.13-cp310-cp310-win_amd64.whl", hash = "sha256:ec2f525273528425ed2f51861b7b88955160cb95dddb17af0914077040aff4a5"}, + {file = "SQLAlchemy-2.0.13-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2424a84f131901fbb20a99844d47b38b517174c6e964c8efb15ea6bb9ced8c2b"}, + {file = "SQLAlchemy-2.0.13-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4f9832815257969b3ca9bf0501351e4c02c8d60cbd3ec9f9070d5b0f8852900e"}, + {file = "SQLAlchemy-2.0.13-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a30e4db983faa5145e00ef6eaf894a2d503b3221dbf40a595f3011930d3d0bac"}, + {file = "SQLAlchemy-2.0.13-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f717944aee40e9f48776cf85b523bb376aa2d9255a268d6d643c57ab387e7264"}, + {file = "SQLAlchemy-2.0.13-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:9119795d2405eb23bf7e6707e228fe38124df029494c1b3576459aa3202ea432"}, + {file = "SQLAlchemy-2.0.13-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2ad9688debf1f0ae9c6e0706a4e2d33b1a01281317cee9bd1d7eef8020c5baac"}, + {file = "SQLAlchemy-2.0.13-cp311-cp311-win32.whl", hash = "sha256:c61b89803a87a3b2a394089a7dadb79a6c64c89f2e8930cc187fec43b319f8d2"}, + {file = "SQLAlchemy-2.0.13-cp311-cp311-win_amd64.whl", hash = "sha256:0aa2cbde85a6eab9263ab480f19e8882d022d30ebcdc14d69e6a8d7c07b0a871"}, + {file = "SQLAlchemy-2.0.13-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9ad883ac4f5225999747f0849643c4d0ec809d9ffe0ddc81a81dd3e68d0af463"}, + {file = "SQLAlchemy-2.0.13-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e481e54db8cec1457ee7c05f6d2329e3298a304a70d3b5e2e82e77170850b385"}, + {file = "SQLAlchemy-2.0.13-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4e08e3831671008888bad5d160d757ef35ce34dbb73b78c3998d16aa1334c97"}, + {file = "SQLAlchemy-2.0.13-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f234ba3bb339ad17803009c8251f5ee65dcf283a380817fe486823b08b26383d"}, + {file = "SQLAlchemy-2.0.13-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:375b7ba88f261dbd79d044f20cbcd919d88befb63f26af9d084614f10cdf97a6"}, + {file = "SQLAlchemy-2.0.13-cp37-cp37m-win32.whl", hash = "sha256:9136d596111c742d061c0f99bab95c5370016c4101a32e72c2b634ad5e0757e6"}, + {file = "SQLAlchemy-2.0.13-cp37-cp37m-win_amd64.whl", hash = "sha256:7612a7366a0855a04430363fb4ab392dc6818aaece0b2e325ff30ee77af9b21f"}, + {file = "SQLAlchemy-2.0.13-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:49c138856035cb97f0053e5e57ba90ec936b28a0b8b0020d44965c7b0c0bf03a"}, + {file = "SQLAlchemy-2.0.13-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a5e9e78332a5d841422b88b8c490dfd7f761e64b3430249b66c05d02f72ceab0"}, + {file = "SQLAlchemy-2.0.13-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd0febae872a4042da44e972c070f0fd49a85a0a7727ab6b85425f74348be14e"}, + {file = "SQLAlchemy-2.0.13-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:566a0ac347cf4632f551e7b28bbd0d215af82e6ffaa2556f565a3b6b51dc3f81"}, + {file = "SQLAlchemy-2.0.13-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e5e5dc300a0ca8755ada1569f5caccfcdca28607dfb98b86a54996b288a8ebd3"}, + {file = "SQLAlchemy-2.0.13-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a25b4c4fdd633501233924f873e6f6cd8970732859ecfe4ecfb60635881f70be"}, + {file = "SQLAlchemy-2.0.13-cp38-cp38-win32.whl", hash = "sha256:6777673d346071451bf7cccf8d0499024f1bd6a835fc90b4fe7af50373d92ce6"}, + {file = "SQLAlchemy-2.0.13-cp38-cp38-win_amd64.whl", hash = "sha256:2f0a355264af0952570f18457102984e1f79510f856e5e0ae652e63316d1ca23"}, + {file = "SQLAlchemy-2.0.13-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d93ebbff3dcf05274843ad8cf650b48ee634626e752c5d73614e5ec9df45f0ce"}, + {file = "SQLAlchemy-2.0.13-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fec56c7d1b6a22c8f01557de3975d962ee40270b81b60d1cfdadf2a105d10e84"}, + {file = "SQLAlchemy-2.0.13-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0eb14a386a5b610305bec6639b35540b47f408b0a59f75999199aed5b3d40079"}, + {file = "SQLAlchemy-2.0.13-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2f3b5236079bc3e318a92bab2cc3f669cc32127075ab03ff61cacbae1c392b8"}, + {file = "SQLAlchemy-2.0.13-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bf1aae95e80acea02a0a622e1c12d3fefc52ffd0fe7bda70a30d070373fbb6c3"}, + {file = "SQLAlchemy-2.0.13-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cdf80359b641185ae7e580afb9f88cf560298f309a38182972091165bfe1225d"}, + {file = "SQLAlchemy-2.0.13-cp39-cp39-win32.whl", hash = "sha256:f463598f9e51ccc04f0fe08500f9a0c3251a7086765350be418598b753b5561d"}, + {file = "SQLAlchemy-2.0.13-cp39-cp39-win_amd64.whl", hash = "sha256:881cc388dded44ae6e17a1666364b98bd76bcdc71b869014ae725f06ba298e0e"}, + {file = "SQLAlchemy-2.0.13-py3-none-any.whl", hash = "sha256:0d6979c9707f8b82366ba34b38b5a6fe32f75766b2e901f9820e271e95384070"}, + {file = "SQLAlchemy-2.0.13.tar.gz", hash = "sha256:8d97b37b4e60073c38bcf94e289e3be09ef9be870de88d163f16e08f2b9ded1a"}, ] [package.dependencies] @@ -3270,16 +3752,86 @@ anyio = ">=3.4.0,<5" [package.extras] full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"] +[[package]] +name = "storage3" +version = "0.5.2" +description = "Supabase Storage client for Python." +category = "main" +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "storage3-0.5.2-py3-none-any.whl", hash = "sha256:3aaba8cebf89eef6b5fc48739b8c8c8539461f2eed9ea1dc4c763dea10c6d009"}, + {file = "storage3-0.5.2.tar.gz", hash = "sha256:e9932fca869a8f9cdab9a20e5249439928cfe2d07c4524141b15fef1882a7f61"}, +] + +[package.dependencies] +httpx = ">=0.23,<0.24" +python-dateutil = ">=2.8.2,<3.0.0" +typing-extensions = ">=4.2.0,<5.0.0" + +[[package]] +name = "strenum" +version = "0.4.10" +description = "An Enum that inherits from str." +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "StrEnum-0.4.10-py3-none-any.whl", hash = "sha256:aebf04bba8e5af435937c452d69a86798b6f8d5ca5f20ba18561dbfad571ccdd"}, + {file = "StrEnum-0.4.10.tar.gz", hash = "sha256:898cc0ebb5054ee07400341ac1d75fdfee489d76d6df3fbc1c2eaf95971e3916"}, +] + +[package.extras] +docs = ["myst-parser[linkify]", "sphinx", "sphinx-rtd-theme"] +release = ["twine"] +test = ["pylint", "pytest", "pytest-black", "pytest-cov", "pytest-pylint"] + +[[package]] +name = "supabase" +version = "1.0.3" +description = "Supabase client for Python." +category = "main" +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "supabase-1.0.3-py3-none-any.whl", hash = "sha256:2418113b7f503522d33fafd442e587356636bad6cb803f7e406e614acf2611d7"}, + {file = "supabase-1.0.3.tar.gz", hash = "sha256:c6eac0144b4236a61ccc72024a8e88d8f08979e47ea635307afae7fb4fc24bc6"}, +] + +[package.dependencies] +gotrue = ">=1.0.1,<2.0.0" +httpx = ">=0.23.0,<0.24.0" +postgrest = ">=0.10.6,<0.11.0" +python-semantic-release = "7.33.2" +realtime = ">=1.0.0,<2.0.0" +storage3 = ">=0.5.2,<0.6.0" +supafunc = ">=0.2.2,<0.3.0" + +[[package]] +name = "supafunc" +version = "0.2.2" +description = "Library for Supabase Functions" +category = "main" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "supafunc-0.2.2-py3-none-any.whl", hash = "sha256:a292812532cca05afc08d2cc040eea5bd79a8909e46051630620b67508070795"}, + {file = "supafunc-0.2.2.tar.gz", hash = "sha256:84f1f8d47297b0c8b712f1d8e20843406c025a203bba00cb7216e2163f295c24"}, +] + +[package.dependencies] +httpx = ">=0.23.0,<0.24.0" + [[package]] name = "sympy" -version = "1.11.1" +version = "1.12" description = "Computer algebra system (CAS) in Python" category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "sympy-1.11.1-py3-none-any.whl", hash = "sha256:938f984ee2b1e8eae8a07b884c8b7a1146010040fccddc6539c54f401c8f6fcf"}, - {file = "sympy-1.11.1.tar.gz", hash = "sha256:e32380dce63cb7c0108ed525570092fd45168bdae2faa17e528221ef72e88658"}, + {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"}, + {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"}, ] [package.dependencies] @@ -3415,6 +3967,18 @@ files = [ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +[[package]] +name = "tomlkit" +version = "0.11.8" +description = "Style preserving TOML library" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomlkit-0.11.8-py3-none-any.whl", hash = "sha256:8c726c4c202bdb148667835f68d68780b9a003a9ec34167b6c673b38eff2a171"}, + {file = "tomlkit-0.11.8.tar.gz", hash = "sha256:9330fc7faa1db67b541b28e62018c17d20be733177d290a13b24c62d1614e0c3"}, +] + [[package]] name = "torch" version = "2.0.1" @@ -3517,19 +4081,19 @@ telegram = ["requests"] [[package]] name = "transformers" -version = "4.28.1" +version = "4.29.1" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" category = "main" optional = false python-versions = ">=3.7.0" files = [ - {file = "transformers-4.28.1-py3-none-any.whl", hash = "sha256:f30a006220d0475789ac0e7c874f51bf5143956797616d89975b637883ce0be6"}, - {file = "transformers-4.28.1.tar.gz", hash = "sha256:7334f8730cff7ac31d9ba5c12f2113fcb7a7a5b61eeb5dbbdb162117c3aaa2d1"}, + {file = "transformers-4.29.1-py3-none-any.whl", hash = "sha256:75f851f2420c26410edbdf4a2a1a5b434ab2b96aea36eb5931d06cc3b2e7b509"}, + {file = "transformers-4.29.1.tar.gz", hash = "sha256:3dc9cd198918e140468edbf37d7edf3b7a75633655ce0771ce323bbf8c118c4d"}, ] [package.dependencies] filelock = "*" -huggingface-hub = ">=0.11.0,<1.0" +huggingface-hub = ">=0.14.1,<1.0" numpy = ">=1.17" packaging = ">=20.0" pyyaml = ">=5.1" @@ -3539,20 +4103,21 @@ tokenizers = ">=0.11.1,<0.11.3 || >0.11.3,<0.14" tqdm = ">=4.27" [package.extras] -accelerate = ["accelerate (>=0.10.0)"] -all = ["Pillow", "accelerate (>=0.10.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8)", "optuna", "phonemizer", "protobuf (<=3.20.2)", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision"] -audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +accelerate = ["accelerate (>=0.19.0)"] +agents = ["Pillow", "accelerate (>=0.19.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=1.9,!=1.12.0)"] +all = ["Pillow", "accelerate (>=0.19.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.6.9)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "numba (<0.57.0)", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf (<=3.20.2)", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision"] +audio = ["kenlm", "librosa", "numba (<0.57.0)", "phonemizer", "pyctcdecode (>=0.4.0)"] codecarbon = ["codecarbon (==1.2.0)"] -deepspeed = ["accelerate (>=0.10.0)", "deepspeed (>=0.8.3)"] -deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.10.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.8.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "optuna", "parameterized", "protobuf (<=3.20.2)", "psutil", "pytest", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "sentencepiece (>=0.1.91,!=0.1.92)", "timeout-decorator"] -dev = ["GitPython (<3.1.19)", "Pillow", "accelerate (>=0.10.0)", "av (==9.2.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8)", "optuna", "parameterized", "phonemizer", "protobuf (<=3.20.2)", "psutil", "pyctcdecode (>=0.4.0)", "pytest", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] -dev-tensorflow = ["GitPython (<3.1.19)", "Pillow", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf (<=3.20.2)", "psutil", "pyctcdecode (>=0.4.0)", "pytest", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.11.1,!=0.11.3,<0.14)"] -dev-torch = ["GitPython (<3.1.19)", "Pillow", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf (<=3.20.2)", "psutil", "pyctcdecode (>=0.4.0)", "pytest", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] -docs = ["Pillow", "accelerate (>=0.10.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1)", "hf-doc-builder", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8)", "optuna", "phonemizer", "protobuf (<=3.20.2)", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision"] +deepspeed = ["accelerate (>=0.19.0)", "deepspeed (>=0.8.3)"] +deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.19.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.8.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "optuna", "parameterized", "protobuf (<=3.20.2)", "psutil", "pytest", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "sentencepiece (>=0.1.91,!=0.1.92)", "timeout-decorator"] +dev = ["GitPython (<3.1.19)", "Pillow", "accelerate (>=0.19.0)", "av (==9.2.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.6.9)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "numba (<0.57.0)", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf (<=3.20.2)", "psutil", "pyctcdecode (>=0.4.0)", "pytest", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +dev-tensorflow = ["GitPython (<3.1.19)", "Pillow", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "numba (<0.57.0)", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf (<=3.20.2)", "psutil", "pyctcdecode (>=0.4.0)", "pytest", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "urllib3 (<2.0.0)"] +dev-torch = ["GitPython (<3.1.19)", "Pillow", "accelerate (>=0.19.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "numba (<0.57.0)", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf (<=3.20.2)", "psutil", "pyctcdecode (>=0.4.0)", "pytest", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +docs = ["Pillow", "accelerate (>=0.19.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.6.9)", "hf-doc-builder", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "numba (<0.57.0)", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf (<=3.20.2)", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision"] docs-specific = ["hf-doc-builder"] fairscale = ["fairscale (>0.3)"] -flax = ["flax (>=0.4.1)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "optax (>=0.0.8)"] -flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +flax = ["flax (>=0.4.1,<=0.6.9)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "optax (>=0.0.8,<=0.1.4)"] +flax-speech = ["kenlm", "librosa", "numba (<0.57.0)", "phonemizer", "pyctcdecode (>=0.4.0)"] ftfy = ["ftfy"] integrations = ["optuna", "ray[tune]", "sigopt"] ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] @@ -3561,7 +4126,7 @@ natten = ["natten (>=0.14.6)"] onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"] onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"] optuna = ["optuna"] -quality = ["GitPython (<3.1.19)", "black (>=23.1,<24.0)", "datasets (!=2.5.0)", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "ruff (>=0.0.241,<=0.0.259)"] +quality = ["GitPython (<3.1.19)", "black (>=23.1,<24.0)", "datasets (!=2.5.0)", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "ruff (>=0.0.241,<=0.0.259)", "urllib3 (<2.0.0)"] ray = ["ray[tune]"] retrieval = ["datasets (!=2.5.0)", "faiss-cpu"] sagemaker = ["sagemaker (>=2.31.0)"] @@ -3569,20 +4134,44 @@ sentencepiece = ["protobuf (<=3.20.2)", "sentencepiece (>=0.1.91,!=0.1.92)"] serving = ["fastapi", "pydantic", "starlette", "uvicorn"] sigopt = ["sigopt"] sklearn = ["scikit-learn"] -speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +speech = ["kenlm", "librosa", "numba (<0.57.0)", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] testing = ["GitPython (<3.1.19)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "parameterized", "protobuf (<=3.20.2)", "psutil", "pytest", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "timeout-decorator"] tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx"] tf-cpu = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx"] -tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +tf-speech = ["kenlm", "librosa", "numba (<0.57.0)", "phonemizer", "pyctcdecode (>=0.4.0)"] timm = ["timm"] tokenizers = ["tokenizers (>=0.11.1,!=0.11.3,<0.14)"] -torch = ["torch (>=1.9,!=1.12.0)"] -torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +torch = ["accelerate (>=0.19.0)", "torch (>=1.9,!=1.12.0)"] +torch-speech = ["kenlm", "librosa", "numba (<0.57.0)", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] torch-vision = ["Pillow", "torchvision"] -torchhub = ["filelock", "huggingface-hub (>=0.11.0,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf (<=3.20.2)", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "tqdm (>=4.27)"] +torchhub = ["filelock", "huggingface-hub (>=0.14.1,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf (<=3.20.2)", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "tqdm (>=4.27)"] video = ["av (==9.2.0)", "decord (==0.6.0)"] vision = ["Pillow"] +[[package]] +name = "twine" +version = "3.8.0" +description = "Collection of utilities for publishing packages on PyPI" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "twine-3.8.0-py3-none-any.whl", hash = "sha256:d0550fca9dc19f3d5e8eadfce0c227294df0a2a951251a4385797c8a6198b7c8"}, + {file = "twine-3.8.0.tar.gz", hash = "sha256:8efa52658e0ae770686a13b675569328f1fba9837e5de1867bfe5f46a9aefe19"}, +] + +[package.dependencies] +colorama = ">=0.4.3" +importlib-metadata = ">=3.6" +keyring = ">=15.1" +pkginfo = ">=1.8.1" +readme-renderer = ">=21.0" +requests = ">=2.20" +requests-toolbelt = ">=0.8.0,<0.9.0 || >0.9.0" +rfc3986 = ">=1.4.0" +tqdm = ">=4.14" +urllib3 = ">=1.26.0" + [[package]] name = "typing-extensions" version = "4.5.0" @@ -3840,14 +4429,14 @@ anyio = ">=3.0.0" [[package]] name = "weaviate-client" -version = "3.17.1" +version = "3.18.0" description = "A python native weaviate client" category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "weaviate-client-3.17.1.tar.gz", hash = "sha256:04277030396a0e63e73b994a185c705f07f948254d27c0a3774c60b4795c37ab"}, - {file = "weaviate_client-3.17.1-py3-none-any.whl", hash = "sha256:0c86f4d5fcb155efd0888515c8caa20364241c0df01dead361ce0c023dbc5da9"}, + {file = "weaviate-client-3.18.0.tar.gz", hash = "sha256:423a526518a32505c5293328e5f252e6cbbf20e4b3124733f70d10fc0d6823c9"}, + {file = "weaviate_client-3.18.0-py3-none-any.whl", hash = "sha256:42b324286a4b4436317e5d2c6ba48c07da6cf01518efdd47ee097e7a8cc7584c"}, ] [package.dependencies] @@ -3859,86 +4448,112 @@ validators = ">=0.18.2,<=0.21.0" [package.extras] grpc = ["grpcio", "grpcio-tools"] +[[package]] +name = "webencodings" +version = "0.5.1" +description = "Character encoding aliases for legacy web content" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"}, + {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"}, +] + [[package]] name = "websockets" -version = "11.0.3" +version = "10.4" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3ccc8a0c387629aec40f2fc9fdcb4b9d5431954f934da3eaf16cdc94f67dbfac"}, - {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d67ac60a307f760c6e65dad586f556dde58e683fab03323221a4e530ead6f74d"}, - {file = "websockets-11.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84d27a4832cc1a0ee07cdcf2b0629a8a72db73f4cf6de6f0904f6661227f256f"}, - {file = "websockets-11.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffd7dcaf744f25f82190856bc26ed81721508fc5cbf2a330751e135ff1283564"}, - {file = "websockets-11.0.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7622a89d696fc87af8e8d280d9b421db5133ef5b29d3f7a1ce9f1a7bf7fcfa11"}, - {file = "websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bceab846bac555aff6427d060f2fcfff71042dba6f5fca7dc4f75cac815e57ca"}, - {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:54c6e5b3d3a8936a4ab6870d46bdd6ec500ad62bde9e44462c32d18f1e9a8e54"}, - {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:41f696ba95cd92dc047e46b41b26dd24518384749ed0d99bea0a941ca87404c4"}, - {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:86d2a77fd490ae3ff6fae1c6ceaecad063d3cc2320b44377efdde79880e11526"}, - {file = "websockets-11.0.3-cp310-cp310-win32.whl", hash = "sha256:2d903ad4419f5b472de90cd2d40384573b25da71e33519a67797de17ef849b69"}, - {file = "websockets-11.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:1d2256283fa4b7f4c7d7d3e84dc2ece74d341bce57d5b9bf385df109c2a1a82f"}, - {file = "websockets-11.0.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e848f46a58b9fcf3d06061d17be388caf70ea5b8cc3466251963c8345e13f7eb"}, - {file = "websockets-11.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aa5003845cdd21ac0dc6c9bf661c5beddd01116f6eb9eb3c8e272353d45b3288"}, - {file = "websockets-11.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b58cbf0697721120866820b89f93659abc31c1e876bf20d0b3d03cef14faf84d"}, - {file = "websockets-11.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:660e2d9068d2bedc0912af508f30bbeb505bbbf9774d98def45f68278cea20d3"}, - {file = "websockets-11.0.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c1f0524f203e3bd35149f12157438f406eff2e4fb30f71221c8a5eceb3617b6b"}, - {file = "websockets-11.0.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:def07915168ac8f7853812cc593c71185a16216e9e4fa886358a17ed0fd9fcf6"}, - {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b30c6590146e53149f04e85a6e4fcae068df4289e31e4aee1fdf56a0dead8f97"}, - {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:619d9f06372b3a42bc29d0cd0354c9bb9fb39c2cbc1a9c5025b4538738dbffaf"}, - {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:01f5567d9cf6f502d655151645d4e8b72b453413d3819d2b6f1185abc23e82dd"}, - {file = "websockets-11.0.3-cp311-cp311-win32.whl", hash = "sha256:e1459677e5d12be8bbc7584c35b992eea142911a6236a3278b9b5ce3326f282c"}, - {file = "websockets-11.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:e7837cb169eca3b3ae94cc5787c4fed99eef74c0ab9506756eea335e0d6f3ed8"}, - {file = "websockets-11.0.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9f59a3c656fef341a99e3d63189852be7084c0e54b75734cde571182c087b152"}, - {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2529338a6ff0eb0b50c7be33dc3d0e456381157a31eefc561771ee431134a97f"}, - {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34fd59a4ac42dff6d4681d8843217137f6bc85ed29722f2f7222bd619d15e95b"}, - {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:332d126167ddddec94597c2365537baf9ff62dfcc9db4266f263d455f2f031cb"}, - {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:6505c1b31274723ccaf5f515c1824a4ad2f0d191cec942666b3d0f3aa4cb4007"}, - {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f467ba0050b7de85016b43f5a22b46383ef004c4f672148a8abf32bc999a87f0"}, - {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:9d9acd80072abcc98bd2c86c3c9cd4ac2347b5a5a0cae7ed5c0ee5675f86d9af"}, - {file = "websockets-11.0.3-cp37-cp37m-win32.whl", hash = "sha256:e590228200fcfc7e9109509e4d9125eace2042fd52b595dd22bbc34bb282307f"}, - {file = "websockets-11.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:b16fff62b45eccb9c7abb18e60e7e446998093cdcb50fed33134b9b6878836de"}, - {file = "websockets-11.0.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fb06eea71a00a7af0ae6aefbb932fb8a7df3cb390cc217d51a9ad7343de1b8d0"}, - {file = "websockets-11.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8a34e13a62a59c871064dfd8ffb150867e54291e46d4a7cf11d02c94a5275bae"}, - {file = "websockets-11.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4841ed00f1026dfbced6fca7d963c4e7043aa832648671b5138008dc5a8f6d99"}, - {file = "websockets-11.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a073fc9ab1c8aff37c99f11f1641e16da517770e31a37265d2755282a5d28aa"}, - {file = "websockets-11.0.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:68b977f21ce443d6d378dbd5ca38621755f2063d6fdb3335bda981d552cfff86"}, - {file = "websockets-11.0.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1a99a7a71631f0efe727c10edfba09ea6bee4166a6f9c19aafb6c0b5917d09c"}, - {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:bee9fcb41db2a23bed96c6b6ead6489702c12334ea20a297aa095ce6d31370d0"}, - {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4b253869ea05a5a073ebfdcb5cb3b0266a57c3764cf6fe114e4cd90f4bfa5f5e"}, - {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:1553cb82942b2a74dd9b15a018dce645d4e68674de2ca31ff13ebc2d9f283788"}, - {file = "websockets-11.0.3-cp38-cp38-win32.whl", hash = "sha256:f61bdb1df43dc9c131791fbc2355535f9024b9a04398d3bd0684fc16ab07df74"}, - {file = "websockets-11.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:03aae4edc0b1c68498f41a6772d80ac7c1e33c06c6ffa2ac1c27a07653e79d6f"}, - {file = "websockets-11.0.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:777354ee16f02f643a4c7f2b3eff8027a33c9861edc691a2003531f5da4f6bc8"}, - {file = "websockets-11.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8c82f11964f010053e13daafdc7154ce7385ecc538989a354ccc7067fd7028fd"}, - {file = "websockets-11.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3580dd9c1ad0701169e4d6fc41e878ffe05e6bdcaf3c412f9d559389d0c9e016"}, - {file = "websockets-11.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f1a3f10f836fab6ca6efa97bb952300b20ae56b409414ca85bff2ad241d2a61"}, - {file = "websockets-11.0.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df41b9bc27c2c25b486bae7cf42fccdc52ff181c8c387bfd026624a491c2671b"}, - {file = "websockets-11.0.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:279e5de4671e79a9ac877427f4ac4ce93751b8823f276b681d04b2156713b9dd"}, - {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1fdf26fa8a6a592f8f9235285b8affa72748dc12e964a5518c6c5e8f916716f7"}, - {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:69269f3a0b472e91125b503d3c0b3566bda26da0a3261c49f0027eb6075086d1"}, - {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:97b52894d948d2f6ea480171a27122d77af14ced35f62e5c892ca2fae9344311"}, - {file = "websockets-11.0.3-cp39-cp39-win32.whl", hash = "sha256:c7f3cb904cce8e1be667c7e6fef4516b98d1a6a0635a58a57528d577ac18a128"}, - {file = "websockets-11.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:c792ea4eabc0159535608fc5658a74d1a81020eb35195dd63214dcf07556f67e"}, - {file = "websockets-11.0.3-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f2e58f2c36cc52d41f2659e4c0cbf7353e28c8c9e63e30d8c6d3494dc9fdedcf"}, - {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de36fe9c02995c7e6ae6efe2e205816f5f00c22fd1fbf343d4d18c3d5ceac2f5"}, - {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0ac56b661e60edd453585f4bd68eb6a29ae25b5184fd5ba51e97652580458998"}, - {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e052b8467dd07d4943936009f46ae5ce7b908ddcac3fda581656b1b19c083d9b"}, - {file = "websockets-11.0.3-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:42cc5452a54a8e46a032521d7365da775823e21bfba2895fb7b77633cce031bb"}, - {file = "websockets-11.0.3-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e6316827e3e79b7b8e7d8e3b08f4e331af91a48e794d5d8b099928b6f0b85f20"}, - {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8531fdcad636d82c517b26a448dcfe62f720e1922b33c81ce695d0edb91eb931"}, - {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c114e8da9b475739dde229fd3bc6b05a6537a88a578358bc8eb29b4030fac9c9"}, - {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e063b1865974611313a3849d43f2c3f5368093691349cf3c7c8f8f75ad7cb280"}, - {file = "websockets-11.0.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:92b2065d642bf8c0a82d59e59053dd2fdde64d4ed44efe4870fa816c1232647b"}, - {file = "websockets-11.0.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0ee68fe502f9031f19d495dae2c268830df2760c0524cbac5d759921ba8c8e82"}, - {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcacf2c7a6c3a84e720d1bb2b543c675bf6c40e460300b628bab1b1efc7c034c"}, - {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b67c6f5e5a401fc56394f191f00f9b3811fe843ee93f4a70df3c389d1adf857d"}, - {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d5023a4b6a5b183dc838808087033ec5df77580485fc533e7dab2567851b0a4"}, - {file = "websockets-11.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ed058398f55163a79bb9f06a90ef9ccc063b204bb346c4de78efc5d15abfe602"}, - {file = "websockets-11.0.3-py3-none-any.whl", hash = "sha256:6681ba9e7f8f3b19440921e99efbb40fc89f26cd71bf539e45d8c8a25c976dc6"}, - {file = "websockets-11.0.3.tar.gz", hash = "sha256:88fc51d9a26b10fc331be344f1781224a375b78488fc343620184e95a4b27016"}, + {file = "websockets-10.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d58804e996d7d2307173d56c297cf7bc132c52df27a3efaac5e8d43e36c21c48"}, + {file = "websockets-10.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc0b82d728fe21a0d03e65f81980abbbcb13b5387f733a1a870672c5be26edab"}, + {file = "websockets-10.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ba089c499e1f4155d2a3c2a05d2878a3428cf321c848f2b5a45ce55f0d7d310c"}, + {file = "websockets-10.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33d69ca7612f0ddff3316b0c7b33ca180d464ecac2d115805c044bf0a3b0d032"}, + {file = "websockets-10.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62e627f6b6d4aed919a2052efc408da7a545c606268d5ab5bfab4432734b82b4"}, + {file = "websockets-10.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38ea7b82bfcae927eeffc55d2ffa31665dc7fec7b8dc654506b8e5a518eb4d50"}, + {file = "websockets-10.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e0cb5cc6ece6ffa75baccfd5c02cffe776f3f5c8bf486811f9d3ea3453676ce8"}, + {file = "websockets-10.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ae5e95cfb53ab1da62185e23b3130e11d64431179debac6dc3c6acf08760e9b1"}, + {file = "websockets-10.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7c584f366f46ba667cfa66020344886cf47088e79c9b9d39c84ce9ea98aaa331"}, + {file = "websockets-10.4-cp310-cp310-win32.whl", hash = "sha256:b029fb2032ae4724d8ae8d4f6b363f2cc39e4c7b12454df8df7f0f563ed3e61a"}, + {file = "websockets-10.4-cp310-cp310-win_amd64.whl", hash = "sha256:8dc96f64ae43dde92530775e9cb169979f414dcf5cff670455d81a6823b42089"}, + {file = "websockets-10.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:47a2964021f2110116cc1125b3e6d87ab5ad16dea161949e7244ec583b905bb4"}, + {file = "websockets-10.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e789376b52c295c4946403bd0efecf27ab98f05319df4583d3c48e43c7342c2f"}, + {file = "websockets-10.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7d3f0b61c45c3fa9a349cf484962c559a8a1d80dae6977276df8fd1fa5e3cb8c"}, + {file = "websockets-10.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f55b5905705725af31ccef50e55391621532cd64fbf0bc6f4bac935f0fccec46"}, + {file = "websockets-10.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00c870522cdb69cd625b93f002961ffb0c095394f06ba8c48f17eef7c1541f96"}, + {file = "websockets-10.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f38706e0b15d3c20ef6259fd4bc1700cd133b06c3c1bb108ffe3f8947be15fa"}, + {file = "websockets-10.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f2c38d588887a609191d30e902df2a32711f708abfd85d318ca9b367258cfd0c"}, + {file = "websockets-10.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:fe10ddc59b304cb19a1bdf5bd0a7719cbbc9fbdd57ac80ed436b709fcf889106"}, + {file = "websockets-10.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:90fcf8929836d4a0e964d799a58823547df5a5e9afa83081761630553be731f9"}, + {file = "websockets-10.4-cp311-cp311-win32.whl", hash = "sha256:b9968694c5f467bf67ef97ae7ad4d56d14be2751000c1207d31bf3bb8860bae8"}, + {file = "websockets-10.4-cp311-cp311-win_amd64.whl", hash = "sha256:a7a240d7a74bf8d5cb3bfe6be7f21697a28ec4b1a437607bae08ac7acf5b4882"}, + {file = "websockets-10.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:74de2b894b47f1d21cbd0b37a5e2b2392ad95d17ae983e64727e18eb281fe7cb"}, + {file = "websockets-10.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3a686ecb4aa0d64ae60c9c9f1a7d5d46cab9bfb5d91a2d303d00e2cd4c4c5cc"}, + {file = "websockets-10.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0d15c968ea7a65211e084f523151dbf8ae44634de03c801b8bd070b74e85033"}, + {file = "websockets-10.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00213676a2e46b6ebf6045bc11d0f529d9120baa6f58d122b4021ad92adabd41"}, + {file = "websockets-10.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e23173580d740bf8822fd0379e4bf30aa1d5a92a4f252d34e893070c081050df"}, + {file = "websockets-10.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:dd500e0a5e11969cdd3320935ca2ff1e936f2358f9c2e61f100a1660933320ea"}, + {file = "websockets-10.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4239b6027e3d66a89446908ff3027d2737afc1a375f8fd3eea630a4842ec9a0c"}, + {file = "websockets-10.4-cp37-cp37m-win32.whl", hash = "sha256:8a5cc00546e0a701da4639aa0bbcb0ae2bb678c87f46da01ac2d789e1f2d2038"}, + {file = "websockets-10.4-cp37-cp37m-win_amd64.whl", hash = "sha256:a9f9a735deaf9a0cadc2d8c50d1a5bcdbae8b6e539c6e08237bc4082d7c13f28"}, + {file = "websockets-10.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5c1289596042fad2cdceb05e1ebf7aadf9995c928e0da2b7a4e99494953b1b94"}, + {file = "websockets-10.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0cff816f51fb33c26d6e2b16b5c7d48eaa31dae5488ace6aae468b361f422b63"}, + {file = "websockets-10.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:dd9becd5fe29773d140d68d607d66a38f60e31b86df75332703757ee645b6faf"}, + {file = "websockets-10.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45ec8e75b7dbc9539cbfafa570742fe4f676eb8b0d3694b67dabe2f2ceed8aa6"}, + {file = "websockets-10.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f72e5cd0f18f262f5da20efa9e241699e0cf3a766317a17392550c9ad7b37d8"}, + {file = "websockets-10.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:185929b4808b36a79c65b7865783b87b6841e852ef5407a2fb0c03381092fa3b"}, + {file = "websockets-10.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7d27a7e34c313b3a7f91adcd05134315002aaf8540d7b4f90336beafaea6217c"}, + {file = "websockets-10.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:884be66c76a444c59f801ac13f40c76f176f1bfa815ef5b8ed44321e74f1600b"}, + {file = "websockets-10.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:931c039af54fc195fe6ad536fde4b0de04da9d5916e78e55405436348cfb0e56"}, + {file = "websockets-10.4-cp38-cp38-win32.whl", hash = "sha256:db3c336f9eda2532ec0fd8ea49fef7a8df8f6c804cdf4f39e5c5c0d4a4ad9a7a"}, + {file = "websockets-10.4-cp38-cp38-win_amd64.whl", hash = "sha256:48c08473563323f9c9debac781ecf66f94ad5a3680a38fe84dee5388cf5acaf6"}, + {file = "websockets-10.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:40e826de3085721dabc7cf9bfd41682dadc02286d8cf149b3ad05bff89311e4f"}, + {file = "websockets-10.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:56029457f219ade1f2fc12a6504ea61e14ee227a815531f9738e41203a429112"}, + {file = "websockets-10.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f5fc088b7a32f244c519a048c170f14cf2251b849ef0e20cbbb0fdf0fdaf556f"}, + {file = "websockets-10.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fc8709c00704194213d45e455adc106ff9e87658297f72d544220e32029cd3d"}, + {file = "websockets-10.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0154f7691e4fe6c2b2bc275b5701e8b158dae92a1ab229e2b940efe11905dff4"}, + {file = "websockets-10.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c6d2264f485f0b53adf22697ac11e261ce84805c232ed5dbe6b1bcb84b00ff0"}, + {file = "websockets-10.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9bc42e8402dc5e9905fb8b9649f57efcb2056693b7e88faa8fb029256ba9c68c"}, + {file = "websockets-10.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:edc344de4dac1d89300a053ac973299e82d3db56330f3494905643bb68801269"}, + {file = "websockets-10.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:84bc2a7d075f32f6ed98652db3a680a17a4edb21ca7f80fe42e38753a58ee02b"}, + {file = "websockets-10.4-cp39-cp39-win32.whl", hash = "sha256:c94ae4faf2d09f7c81847c63843f84fe47bf6253c9d60b20f25edfd30fb12588"}, + {file = "websockets-10.4-cp39-cp39-win_amd64.whl", hash = "sha256:bbccd847aa0c3a69b5f691a84d2341a4f8a629c6922558f2a70611305f902d74"}, + {file = "websockets-10.4-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:82ff5e1cae4e855147fd57a2863376ed7454134c2bf49ec604dfe71e446e2193"}, + {file = "websockets-10.4-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d210abe51b5da0ffdbf7b43eed0cfdff8a55a1ab17abbec4301c9ff077dd0342"}, + {file = "websockets-10.4-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:942de28af58f352a6f588bc72490ae0f4ccd6dfc2bd3de5945b882a078e4e179"}, + {file = "websockets-10.4-pp37-pypy37_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9b27d6c1c6cd53dc93614967e9ce00ae7f864a2d9f99fe5ed86706e1ecbf485"}, + {file = "websockets-10.4-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:3d3cac3e32b2c8414f4f87c1b2ab686fa6284a980ba283617404377cd448f631"}, + {file = "websockets-10.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:da39dd03d130162deb63da51f6e66ed73032ae62e74aaccc4236e30edccddbb0"}, + {file = "websockets-10.4-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:389f8dbb5c489e305fb113ca1b6bdcdaa130923f77485db5b189de343a179393"}, + {file = "websockets-10.4-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09a1814bb15eff7069e51fed0826df0bc0702652b5cb8f87697d469d79c23576"}, + {file = "websockets-10.4-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff64a1d38d156d429404aaa84b27305e957fd10c30e5880d1765c9480bea490f"}, + {file = "websockets-10.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b343f521b047493dc4022dd338fc6db9d9282658862756b4f6fd0e996c1380e1"}, + {file = "websockets-10.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:932af322458da7e4e35df32f050389e13d3d96b09d274b22a7aa1808f292fee4"}, + {file = "websockets-10.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6a4162139374a49eb18ef5b2f4da1dd95c994588f5033d64e0bbfda4b6b6fcf"}, + {file = "websockets-10.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c57e4c1349fbe0e446c9fa7b19ed2f8a4417233b6984277cce392819123142d3"}, + {file = "websockets-10.4-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b627c266f295de9dea86bd1112ed3d5fafb69a348af30a2422e16590a8ecba13"}, + {file = "websockets-10.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:05a7233089f8bd355e8cbe127c2e8ca0b4ea55467861906b80d2ebc7db4d6b72"}, + {file = "websockets-10.4.tar.gz", hash = "sha256:eef610b23933c54d5d921c92578ae5f89813438fded840c2e9809d378dc765d3"}, +] + +[[package]] +name = "wheel" +version = "0.40.0" +description = "A built-package format for Python" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "wheel-0.40.0-py3-none-any.whl", hash = "sha256:d236b20e7cb522daf2390fa84c55eea81c5c30190f90f29ae2ca1ad8355bf247"}, + {file = "wheel-0.40.0.tar.gz", hash = "sha256:cd1196f3faee2b31968d626e1731c94f99cbdb67cf5a46e4f5656cbee7738873"}, ] +[package.extras] +test = ["pytest (>=6.0.0)"] + [[package]] name = "win32-setctime" version = "1.1.0" @@ -4054,6 +4669,22 @@ files = [ idna = ">=2.0" multidict = ">=4.0" +[[package]] +name = "zipp" +version = "3.15.0" +description = "Backport of pathlib-compatible object wrapper for zip files" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"}, + {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + [[package]] name = "zstandard" version = "0.21.0" @@ -4113,7 +4744,10 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\ [package.extras] cffi = ["cffi (>=1.11)"] +[extras] +postgresql = ["psycopg2cffi"] + [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "565f4035798b2616c26c04feee5e98ebf7630dec67b4df73adae876ffd52ec86" +content-hash = "33df6345a0d11114e42451f44618e95a9573dc15311e1e64a85a878cd3df773f" diff --git a/pyproject.toml b/pyproject.toml index bb7fa1705..628ab44df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,14 +31,22 @@ pinecone-client = "^2.1.0" weaviate-client = "^3.12.0" pymilvus = "^2.2.2" qdrant-client = {version = "^1.0.4", python = "<3.12"} -redis = "4.5.1" +redis = "4.5.4" +supabase = "^1.0.2" +psycopg2 = "^2.9.5" llama-index = "0.5.4" azure-identity = "^1.12.0" azure-search-documents = {version = "11.4.0a20230509004", source = "azure-sdk-dev"} +pgvector = "^0.1.7" +psycopg2cffi = {version = "^2.9.0", optional = true} +loguru = "^0.7.0" [tool.poetry.scripts] start = "server.main:start" -dev = "local-server.main:start" +dev = "local_server.main:start" + +[tool.poetry.extras] +postgresql = ["psycopg2cffi"] [tool.poetry.group.dev.dependencies] httpx = "^0.23.3" diff --git a/scripts/process_json/process_json.py b/scripts/process_json/process_json.py index 0dc2f8439..8b9624cbb 100644 --- a/scripts/process_json/process_json.py +++ b/scripts/process_json/process_json.py @@ -3,6 +3,7 @@ import argparse import asyncio +from loguru import logger from models.models import Document, DocumentMetadata from datastore.datastore import DataStore from datastore.factory import get_datastore @@ -28,7 +29,7 @@ async def process_json_dump( # iterate over the data and create document objects for item in data: if len(documents) % 20 == 0: - print(f"Processed {len(documents)} documents") + logger.info(f"Processed {len(documents)} documents") try: # get the id, text, source, source_id, url, created_at and author from the item @@ -42,7 +43,7 @@ async def process_json_dump( author = item.get("author", None) if not text: - print("No document text, skipping...") + logger.info("No document text, skipping...") continue # create a metadata object with the source, source_id, url, created_at and author @@ -53,7 +54,7 @@ async def process_json_dump( created_at=created_at, author=author, ) - print("metadata: ", str(metadata)) + logger.info("metadata: ", str(metadata)) # update metadata with custom values for key, value in custom_metadata.items(): @@ -65,7 +66,7 @@ async def process_json_dump( pii_detected = screen_text_for_pii(text) # if pii detected, print a warning and skip the document if pii_detected: - print("PII detected in document, skipping") + logger.info("PII detected in document, skipping") skipped_items.append(item) # add the skipped item to the list continue @@ -87,7 +88,7 @@ async def process_json_dump( documents.append(document) except Exception as e: # log the error and continue with the next item - print(f"Error processing {item}: {e}") + logger.error(f"Error processing {item}: {e}") skipped_items.append(item) # add the skipped item to the list # do this in batches, the upsert method already batches documents but this allows @@ -95,14 +96,14 @@ async def process_json_dump( for i in range(0, len(documents), DOCUMENT_UPSERT_BATCH_SIZE): # Get the text of the chunks in the current batch batch_documents = documents[i : i + DOCUMENT_UPSERT_BATCH_SIZE] - print(f"Upserting batch of {len(batch_documents)} documents, batch {i}") - print("documents: ", documents) + logger.info(f"Upserting batch of {len(batch_documents)} documents, batch {i}") + logger.info("documents: ", documents) await datastore.upsert(batch_documents) # print the skipped items - print(f"Skipped {len(skipped_items)} items due to errors or PII detection") + logger.info(f"Skipped {len(skipped_items)} items due to errors or PII detection") for item in skipped_items: - print(item) + logger.info(item) async def main(): diff --git a/scripts/process_jsonl/process_jsonl.py b/scripts/process_jsonl/process_jsonl.py index 8795553cd..463871b96 100644 --- a/scripts/process_jsonl/process_jsonl.py +++ b/scripts/process_jsonl/process_jsonl.py @@ -3,6 +3,7 @@ import argparse import asyncio +from loguru import logger from models.models import Document, DocumentMetadata from datastore.datastore import DataStore from datastore.factory import get_datastore @@ -28,7 +29,7 @@ async def process_jsonl_dump( # iterate over the data and create document objects for item in data: if len(documents) % 20 == 0: - print(f"Processed {len(documents)} documents") + logger.info(f"Processed {len(documents)} documents") try: # get the id, text, source, source_id, url, created_at and author from the item @@ -42,7 +43,7 @@ async def process_jsonl_dump( author = item.get("author", None) if not text: - print("No document text, skipping...") + logger.info("No document text, skipping...") continue # create a metadata object with the source, source_id, url, created_at and author @@ -64,7 +65,7 @@ async def process_jsonl_dump( pii_detected = screen_text_for_pii(text) # if pii detected, print a warning and skip the document if pii_detected: - print("PII detected in document, skipping") + logger.info("PII detected in document, skipping") skipped_items.append(item) # add the skipped item to the list continue @@ -86,7 +87,7 @@ async def process_jsonl_dump( documents.append(document) except Exception as e: # log the error and continue with the next item - print(f"Error processing {item}: {e}") + logger.error(f"Error processing {item}: {e}") skipped_items.append(item) # add the skipped item to the list # do this in batches, the upsert method already batches documents but this allows @@ -94,13 +95,13 @@ async def process_jsonl_dump( for i in range(0, len(documents), DOCUMENT_UPSERT_BATCH_SIZE): # Get the text of the chunks in the current batch batch_documents = documents[i : i + DOCUMENT_UPSERT_BATCH_SIZE] - print(f"Upserting batch of {len(batch_documents)} documents, batch {i}") + logger.info(f"Upserting batch of {len(batch_documents)} documents, batch {i}") await datastore.upsert(batch_documents) # print the skipped items - print(f"Skipped {len(skipped_items)} items due to errors or PII detection") + logger.info(f"Skipped {len(skipped_items)} items due to errors or PII detection") for item in skipped_items: - print(item) + logger.info(item) async def main(): diff --git a/scripts/process_zip/process_zip.py b/scripts/process_zip/process_zip.py index cffca2df7..7865c85b5 100644 --- a/scripts/process_zip/process_zip.py +++ b/scripts/process_zip/process_zip.py @@ -5,6 +5,7 @@ import argparse import asyncio +from loguru import logger from models.models import Document, DocumentMetadata, Source from datastore.datastore import DataStore from datastore.factory import get_datastore @@ -32,13 +33,13 @@ async def process_file_dump( for root, dirs, files in os.walk("dump"): for filename in files: if len(documents) % 20 == 0: - print(f"Processed {len(documents)} documents") + logger.info(f"Processed {len(documents)} documents") filepath = os.path.join(root, filename) try: extracted_text = extract_text_from_filepath(filepath) - print(f"extracted_text from {filepath}") + logger.info(f"extracted_text from {filepath}") # create a metadata object with the source and source_id fields metadata = DocumentMetadata( @@ -56,7 +57,7 @@ async def process_file_dump( pii_detected = screen_text_for_pii(extracted_text) # if pii detected, print a warning and skip the document if pii_detected: - print("PII detected in document, skipping") + logger.info("PII detected in document, skipping") skipped_files.append( filepath ) # add the skipped file to the list @@ -80,7 +81,7 @@ async def process_file_dump( documents.append(document) except Exception as e: # log the error and continue with the next file - print(f"Error processing {filepath}: {e}") + logger.error(f"Error processing {filepath}: {e}") skipped_files.append(filepath) # add the skipped file to the list # do this in batches, the upsert method already batches documents but this allows @@ -88,8 +89,8 @@ async def process_file_dump( for i in range(0, len(documents), DOCUMENT_UPSERT_BATCH_SIZE): # Get the text of the chunks in the current batch batch_documents = [doc for doc in documents[i : i + DOCUMENT_UPSERT_BATCH_SIZE]] - print(f"Upserting batch of {len(batch_documents)} documents, batch {i}") - print("documents: ", documents) + logger.info(f"Upserting batch of {len(batch_documents)} documents, batch {i}") + logger.info("documents: ", documents) await datastore.upsert(batch_documents) # delete all files in the dump directory @@ -105,9 +106,9 @@ async def process_file_dump( os.rmdir("dump") # print the skipped files - print(f"Skipped {len(skipped_files)} files due to errors or PII detection") + logger.info(f"Skipped {len(skipped_files)} files due to errors or PII detection") for file in skipped_files: - print(file) + logger.info(file) async def main(): diff --git a/server/main.py b/server/main.py index 3d44ced4f..dc3377a1b 100644 --- a/server/main.py +++ b/server/main.py @@ -4,6 +4,7 @@ from fastapi import FastAPI, File, Form, HTTPException, Depends, Body, UploadFile from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials from fastapi.staticfiles import StaticFiles +from loguru import logger from models.api import ( DeleteRequest, @@ -66,7 +67,7 @@ async def upsert_file( ids = await datastore.upsert([document]) return UpsertResponse(ids=ids) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail=f"str({e})") @@ -81,7 +82,7 @@ async def upsert( ids = await datastore.upsert(request.documents) return UpsertResponse(ids=ids) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail="Internal Service Error") @@ -98,7 +99,7 @@ async def query_main( ) return QueryResponse(results=results) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail="Internal Service Error") @@ -117,7 +118,7 @@ async def query( ) return QueryResponse(results=results) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail="Internal Service Error") @@ -141,7 +142,7 @@ async def delete( ) return DeleteResponse(success=success) except Exception as e: - print("Error:", e) + logger.error(e) raise HTTPException(status_code=500, detail="Internal Service Error") diff --git a/services/date.py b/services/date.py index 57b2ee5eb..476c7aedb 100644 --- a/services/date.py +++ b/services/date.py @@ -1,4 +1,5 @@ import arrow +from loguru import logger def to_unix_timestamp(date_str: str) -> int: @@ -19,5 +20,5 @@ def to_unix_timestamp(date_str: str) -> int: return int(date_obj.timestamp()) except arrow.parser.ParserError: # If the parsing fails, return the current unix timestamp and print a warning - print(f"Invalid date format: {date_str}") + logger.info(f"Invalid date format: {date_str}") return int(arrow.now().timestamp()) diff --git a/services/extract_metadata.py b/services/extract_metadata.py index deecb677b..8c8d4ae7e 100644 --- a/services/extract_metadata.py +++ b/services/extract_metadata.py @@ -3,6 +3,7 @@ import json from typing import Dict import os +from loguru import logger def extract_metadata_from_document(text: str) -> Dict[str, str]: sources = Source.__members__.keys() @@ -32,7 +33,7 @@ def extract_metadata_from_document(text: str) -> Dict[str, str]: os.environ.get("OPENAI_METADATA_EXTRACTIONMODEL_DEPLOYMENTID") ) # TODO: change to your preferred model name - print(f"completion: {completion}") + logger.info(f"completion: {completion}") try: metadata = json.loads(completion) diff --git a/services/file.py b/services/file.py index 90e0e5ea0..136fc17c5 100644 --- a/services/file.py +++ b/services/file.py @@ -7,6 +7,7 @@ import docx2txt import csv import pptx +from loguru import logger from models.models import Document, DocumentMetadata @@ -38,7 +39,7 @@ def extract_text_from_filepath(filepath: str, mimetype: Optional[str] = None) -> with open(filepath, "rb") as file: extracted_text = extract_text_from_file(file, mimetype) except Exception as e: - print(f"Error: {e}") + logger.error(e) raise e return extracted_text @@ -91,9 +92,9 @@ async def extract_text_from_form_file(file: UploadFile): """Return the text content of a file.""" # get the file body from the upload file object mimetype = file.content_type - print(f"mimetype: {mimetype}") - print(f"file.file: {file.file}") - print("file: ", file) + logger.info(f"mimetype: {mimetype}") + logger.info(f"file.file: {file.file}") + logger.info("file: ", file) file_stream = await file.read() @@ -106,7 +107,7 @@ async def extract_text_from_form_file(file: UploadFile): try: extracted_text = extract_text_from_filepath(temp_file_path, mimetype) except Exception as e: - print(f"Error: {e}") + logger.error(e) os.remove(temp_file_path) raise e diff --git a/services/openai.py b/services/openai.py index 426ad3511..ddc2855ee 100644 --- a/services/openai.py +++ b/services/openai.py @@ -1,6 +1,7 @@ from typing import List import openai import os +from loguru import logger from tenacity import retry, wait_random_exponential, stop_after_attempt @@ -28,7 +29,7 @@ def get_embeddings(texts: List[str]) -> List[List[float]]: response = openai.Embedding.create(input=texts, model="text-embedding-ada-002") else: response = openai.Embedding.create(input=texts, deployment_id=deployment) - + # Extract the embedding data from the response data = response["data"] # type: ignore @@ -68,9 +69,9 @@ def get_chat_completion( deployment_id = deployment_id, messages=messages, ) - + choices = response["choices"] # type: ignore completion = choices[0].message.content.strip() - print(f"Completion: {completion}") + logger.info(f"Completion: {completion}") return completion diff --git a/tests/datastore/providers/analyticdb/test_analyticdb_datastore.py b/tests/datastore/providers/analyticdb/test_analyticdb_datastore.py new file mode 100644 index 000000000..9a79b9f43 --- /dev/null +++ b/tests/datastore/providers/analyticdb/test_analyticdb_datastore.py @@ -0,0 +1,323 @@ +import pytest +from models.models import ( + DocumentChunkMetadata, + DocumentMetadataFilter, + DocumentChunk, + QueryWithEmbedding, + Source, +) +from datastore.providers.analyticdb_datastore import ( + OUTPUT_DIM, + AnalyticDBDataStore, +) + + +@pytest.fixture +def analyticdb_datastore(): + return AnalyticDBDataStore() + + +@pytest.fixture +def document_chunk_one(): + doc_id = "zerp" + doc_chunks = [] + + ids = ["abc_123", "def_456", "ghi_789"] + texts = [ + "lorem ipsum dolor sit amet", + "consectetur adipiscing elit", + "sed do eiusmod tempor incididunt", + ] + sources = [Source.email, Source.file, Source.chat] + source_ids = ["foo", "bar", "baz"] + urls = ["foo.com", "bar.net", "baz.org"] + created_ats = [ + "1929-10-28T09:30:00-05:00", + "2009-01-03T16:39:57-08:00", + "2021-01-21T10:00:00-02:00", + ] + authors = ["Max Mustermann", "John Doe", "Jane Doe"] + embeddings = [[x] * OUTPUT_DIM for x in range(3)] + + for i in range(3): + chunk = DocumentChunk( + id=ids[i], + text=texts[i], + metadata=DocumentChunkMetadata( + document_id=doc_id, + source=sources[i], + source_id=source_ids[i], + url=urls[i], + created_at=created_ats[i], + author=authors[i], + ), + embedding=embeddings[i], # type: ignore + ) + + doc_chunks.append(chunk) + + return {doc_id: doc_chunks} + + +@pytest.fixture +def document_chunk_two(): + doc_id_1 = "zerp" + doc_chunks_1 = [] + + ids = ["abc_123", "def_456", "ghi_789"] + texts = [ + "1lorem ipsum dolor sit amet", + "2consectetur adipiscing elit", + "3sed do eiusmod tempor incididunt", + ] + sources = [Source.email, Source.file, Source.chat] + source_ids = ["foo", "bar", "baz"] + urls = ["foo.com", "bar.net", "baz.org"] + created_ats = [ + "1929-10-28T09:30:00-05:00", + "2009-01-03T16:39:57-08:00", + "3021-01-21T10:00:00-02:00", + ] + authors = ["Max Mustermann", "John Doe", "Jane Doe"] + embeddings = [[x] * OUTPUT_DIM for x in range(3)] + + for i in range(3): + chunk = DocumentChunk( + id=ids[i], + text=texts[i], + metadata=DocumentChunkMetadata( + document_id=doc_id_1, + source=sources[i], + source_id=source_ids[i], + url=urls[i], + created_at=created_ats[i], + author=authors[i], + ), + embedding=embeddings[i], # type: ignore + ) + + doc_chunks_1.append(chunk) + + doc_id_2 = "merp" + doc_chunks_2 = [] + + ids = ["jkl_123", "lmn_456", "opq_789"] + texts = [ + "3sdsc efac feas sit qweas", + "4wert sdfas fdsc", + "52dsc fdsf eiusmod asdasd incididunt", + ] + sources = [Source.email, Source.file, Source.chat] + source_ids = ["foo", "bar", "baz"] + urls = ["foo.com", "bar.net", "baz.org"] + created_ats = [ + "4929-10-28T09:30:00-05:00", + "5009-01-03T16:39:57-08:00", + "6021-01-21T10:00:00-02:00", + ] + authors = ["Max Mustermann", "John Doe", "Jane Doe"] + embeddings = [[x] * OUTPUT_DIM for x in range(3, 6)] + + for i in range(3): + chunk = DocumentChunk( + id=ids[i], + text=texts[i], + metadata=DocumentChunkMetadata( + document_id=doc_id_2, + source=sources[i], + source_id=source_ids[i], + url=urls[i], + created_at=created_ats[i], + author=authors[i], + ), + embedding=embeddings[i], # type: ignore + ) + + doc_chunks_2.append(chunk) + + return {doc_id_1: doc_chunks_1, doc_id_2: doc_chunks_2} + + +@pytest.mark.asyncio +async def test_upsert(analyticdb_datastore, document_chunk_one): + await analyticdb_datastore.delete(delete_all=True) + res = await analyticdb_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + query = QueryWithEmbedding( + query="lorem", + top_k=10, + embedding=[0.5] * OUTPUT_DIM, + ) + query_results = await analyticdb_datastore._query(queries=[query]) + assert 3 == len(query_results[0].results) + + +@pytest.mark.asyncio +async def test_reload(analyticdb_datastore, document_chunk_one, document_chunk_two): + await analyticdb_datastore.delete(delete_all=True) + + res = await analyticdb_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + + query = QueryWithEmbedding( + query="lorem", + top_k=10, + embedding=[0.5] * OUTPUT_DIM, + ) + + query_results = await analyticdb_datastore._query(queries=[query]) + assert 3 == len(query_results[0].results) + new_store = AnalyticDBDataStore() + another_in = {i: document_chunk_two[i] for i in document_chunk_two if i != res[0]} + res = await new_store._upsert(another_in) + + query_results = await analyticdb_datastore._query(queries=[query]) + assert 1 == len(query_results) + assert 6 == len(query_results[0].results) + + +@pytest.mark.asyncio +async def test_upsert_query_all(analyticdb_datastore, document_chunk_two): + await analyticdb_datastore.delete(delete_all=True) + res = await analyticdb_datastore._upsert(document_chunk_two) + assert res == list(document_chunk_two.keys()) + # Num entities currently doesn't track deletes + query = QueryWithEmbedding( + query="lorem", + top_k=10, + embedding=[0.5] * OUTPUT_DIM, + ) + query_results = await analyticdb_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 6 == len(query_results[0].results) + + +@pytest.mark.asyncio +async def test_query_accuracy(analyticdb_datastore, document_chunk_one): + await analyticdb_datastore.delete(delete_all=True) + res = await analyticdb_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + query = QueryWithEmbedding( + query="lorem", + top_k=1, + embedding=[0] * OUTPUT_DIM, + ) + query_results = await analyticdb_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 1 == len(query_results[0].results) + assert 0 == query_results[0].results[0].score + assert "abc_123" == query_results[0].results[0].id + + +@pytest.mark.asyncio +async def test_query_filter(analyticdb_datastore, document_chunk_one): + await analyticdb_datastore.delete(delete_all=True) + res = await analyticdb_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + query = QueryWithEmbedding( + query="lorem", + top_k=1, + embedding=[0] * OUTPUT_DIM, + filter=DocumentMetadataFilter( + start_date="2000-01-03T16:39:57-08:00", end_date="2010-01-03T16:39:57-08:00" + ), + ) + query_results = await analyticdb_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 1 == len(query_results[0].results) + assert 0 != query_results[0].results[0].score + assert "def_456" == query_results[0].results[0].id + + +@pytest.mark.asyncio +async def test_delete_with_date_filter(analyticdb_datastore, document_chunk_one): + await analyticdb_datastore.delete(delete_all=True) + res = await analyticdb_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + await analyticdb_datastore.delete( + filter=DocumentMetadataFilter( + end_date="2009-01-03T16:39:57-08:00", + ) + ) + + query = QueryWithEmbedding( + query="lorem", + top_k=9, + embedding=[0] * OUTPUT_DIM, + ) + query_results = await analyticdb_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 1 == len(query_results[0].results) + assert "ghi_789" == query_results[0].results[0].id + + +@pytest.mark.asyncio +async def test_delete_with_source_filter(analyticdb_datastore, document_chunk_one): + await analyticdb_datastore.delete(delete_all=True) + res = await analyticdb_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + await analyticdb_datastore.delete( + filter=DocumentMetadataFilter( + source=Source.email, + ) + ) + + query = QueryWithEmbedding( + query="lorem", + top_k=9, + embedding=[0] * OUTPUT_DIM, + ) + query_results = await analyticdb_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 2 == len(query_results[0].results) + assert "def_456" == query_results[0].results[0].id + + +@pytest.mark.asyncio +async def test_delete_with_document_id_filter(analyticdb_datastore, document_chunk_one): + await analyticdb_datastore.delete(delete_all=True) + res = await analyticdb_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + await analyticdb_datastore.delete( + filter=DocumentMetadataFilter( + document_id=res[0], + ) + ) + query = QueryWithEmbedding( + query="lorem", + top_k=9, + embedding=[0] * OUTPUT_DIM, + ) + query_results = await analyticdb_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 0 == len(query_results[0].results) + + +@pytest.mark.asyncio +async def test_delete_with_document_id(analyticdb_datastore, document_chunk_one): + await analyticdb_datastore.delete(delete_all=True) + res = await analyticdb_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + await analyticdb_datastore.delete([res[0]]) + + query = QueryWithEmbedding( + query="lorem", + top_k=9, + embedding=[0] * OUTPUT_DIM, + ) + query_results = await analyticdb_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 0 == len(query_results[0].results) + + +# if __name__ == '__main__': +# import sys +# import pytest +# pytest.main(sys.argv) diff --git a/tests/datastore/providers/chroma/test_chroma_datastore.py b/tests/datastore/providers/chroma/test_chroma_datastore.py index 2820af1f6..eacc22ee4 100644 --- a/tests/datastore/providers/chroma/test_chroma_datastore.py +++ b/tests/datastore/providers/chroma/test_chroma_datastore.py @@ -17,7 +17,7 @@ def ephemeral_chroma_datastore() -> ChromaDataStore: - # Initalize an ephemeral in-memory ChromaDB instance + # Initialize an ephemeral in-memory ChromaDB instance return ChromaDataStore( collection_name=COLLECTION_NAME, in_memory=True, persistence_dir=None ) diff --git a/tests/datastore/providers/postgres/test_postgres_datastore.py b/tests/datastore/providers/postgres/test_postgres_datastore.py new file mode 100644 index 000000000..1e4f0a847 --- /dev/null +++ b/tests/datastore/providers/postgres/test_postgres_datastore.py @@ -0,0 +1,291 @@ +from typing import Dict, List +import pytest +from datastore.providers.postgres_datastore import PostgresDataStore +from models.models import ( + DocumentChunk, + DocumentChunkMetadata, + DocumentMetadataFilter, + QueryWithEmbedding, +) + + +def create_embedding(non_zero_pos: int) -> List[float]: + # create a vector with a single non-zero value of dimension 1535 + vector = [0.0] * 1536 + vector[non_zero_pos - 1] = 1.0 + return vector + + +@pytest.fixture +def initial_document_chunks() -> Dict[str, List[DocumentChunk]]: + first_doc_chunks = [ + DocumentChunk( + id=f"first-doc-{i}", + text=f"Lorem ipsum {i}", + metadata=DocumentChunkMetadata(), + embedding=create_embedding(i), + ) + for i in range(4, 7) + ] + return { + "first-doc": first_doc_chunks, + } + + +@pytest.fixture +def queries() -> List[QueryWithEmbedding]: + queries = [ + QueryWithEmbedding( + query="Query 1", + top_k=1, + embedding=create_embedding(4), + ), + QueryWithEmbedding( + query="Query 2", + top_k=2, + embedding=create_embedding(5), + ), + ] + return queries + + +@pytest.fixture +def postgres_datastore() -> PostgresDataStore: + return PostgresDataStore() + + +@pytest.mark.asyncio +async def test_upsert( + postgres_datastore: PostgresDataStore, + initial_document_chunks: Dict[str, List[DocumentChunk]], +) -> None: + """Test basic upsert.""" + doc_ids = await postgres_datastore._upsert(initial_document_chunks) + assert doc_ids == [doc_id for doc_id in initial_document_chunks] + + +@pytest.mark.asyncio +async def test_query( + postgres_datastore: PostgresDataStore, + initial_document_chunks: Dict[str, List[DocumentChunk]], + queries: List[QueryWithEmbedding], +) -> None: + """Test basic query.""" + # insert to prepare for test + await postgres_datastore._upsert(initial_document_chunks) + + query_results = await postgres_datastore._query(queries) + assert len(query_results) == len(queries) + + query_0_results = query_results[0].results + query_1_results = query_results[1].results + + assert len(query_0_results) == 1 + assert len(query_1_results) == 2 + + # NOTE: this is the correct behavior + assert query_0_results[0].id == "first-doc-4" + assert query_1_results[0].id == "first-doc-5" + assert query_1_results[1].id == "first-doc-4" + + +@pytest.mark.asyncio +async def test_delete( + postgres_datastore: PostgresDataStore, + initial_document_chunks: Dict[str, List[DocumentChunk]], +) -> None: + # insert to prepare for test + await postgres_datastore._upsert(initial_document_chunks) + + is_success = await postgres_datastore.delete(["first-doc"]) + assert is_success + + +@pytest.mark.asyncio +async def test_upsert_new_chunk(postgres_datastore): + await postgres_datastore.delete(delete_all=True) + chunk = DocumentChunk( + id="chunk1", + text="Sample text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata(), + ) + ids = await postgres_datastore._upsert({"doc1": [chunk]}) + assert len(ids) == 1 + + +@pytest.mark.asyncio +async def test_upsert_existing_chunk(postgres_datastore): + await postgres_datastore.delete(delete_all=True) + chunk = DocumentChunk( + id="chunk1", + text="Sample text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata(), + ) + ids = await postgres_datastore._upsert({"doc1": [chunk]}) + + chunk = DocumentChunk( + id="chunk1", + text="New text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata(), + ) + ids = await postgres_datastore._upsert({"doc1": [chunk]}) + + query_embedding = [1] * 1536 + query = QueryWithEmbedding( + query="Query", + embedding=query_embedding, + top_k=1, + ) + results = await postgres_datastore._query([query]) + + assert len(ids) == 1 + assert len(results[0].results) == 1 + assert results[0].results[0].id == "chunk1" + assert results[0].results[0].text == "New text" + + +@pytest.mark.asyncio +async def test_query_score(postgres_datastore): + await postgres_datastore.delete(delete_all=True) + chunk1 = DocumentChunk( + id="chunk1", + text="Sample text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata(), + ) + chunk2 = DocumentChunk( + id="chunk2", + text="Another text", + embedding=[-1 if i % 2 == 0 else 1 for i in range(1536)], + metadata=DocumentChunkMetadata(), + ) + await postgres_datastore._upsert({"doc1": [chunk1], "doc2": [chunk2]}) + + query_embedding = [1] * 1536 + query = QueryWithEmbedding( + query="Query", + embedding=query_embedding, + ) + results = await postgres_datastore._query([query]) + + assert results[0].results[0].id == "chunk1" + assert int(results[0].results[0].score) == 1536 + + +@pytest.mark.asyncio +async def test_query_filter(postgres_datastore): + await postgres_datastore.delete(delete_all=True) + chunk1 = DocumentChunk( + id="chunk1", + text="Sample text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata( + source="email", created_at="2021-01-01", author="John" + ), + ) + chunk2 = DocumentChunk( + id="chunk2", + text="Another text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata( + source="chat", created_at="2022-02-02", author="Mike" + ), + ) + await postgres_datastore._upsert({"doc1": [chunk1], "doc2": [chunk2]}) + + # Test author filter -- string + query_embedding = [1] * 1536 + query = QueryWithEmbedding( + query="Query", + embedding=query_embedding, + filter=DocumentMetadataFilter(author="John"), + ) + results = await postgres_datastore._query([query]) + assert results[0].results[0].id == "chunk1" + + # Test source filter -- enum + query_embedding = [1] * 1536 + query = QueryWithEmbedding( + query="Query", + embedding=query_embedding, + filter=DocumentMetadataFilter(source="chat"), + ) + results = await postgres_datastore._query([query]) + assert results[0].results[0].id == "chunk2" + + # Test created_at filter -- date + query_embedding = [1] * 1536 + query = QueryWithEmbedding( + query="Query", + embedding=query_embedding, + filter=DocumentMetadataFilter(start_date="2022-01-01"), + ) + results = await postgres_datastore._query([query]) + assert results[0].results[0].id == "chunk2" + + +@pytest.mark.asyncio +async def test_delete(postgres_datastore): + await postgres_datastore.delete(delete_all=True) + chunk1 = DocumentChunk( + id="chunk1", + text="Sample text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata(), + ) + chunk2 = DocumentChunk( + id="chunk2", + text="Another text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata(), + ) + await postgres_datastore._upsert({"doc1": [chunk1], "doc2": [chunk2]}) + + query_embedding = [1] * 1536 + query = QueryWithEmbedding( + query="Another query", + embedding=query_embedding, + ) + results = await postgres_datastore._query([query]) + + assert len(results[0].results) == 2 + assert results[0].results[0].id == "chunk1" + assert results[0].results[1].id == "chunk2" + + await postgres_datastore.delete(ids=["doc1"]) + results_after_delete = await postgres_datastore._query([query]) + + assert len(results_after_delete[0].results) == 1 + assert results_after_delete[0].results[0].id == "chunk2" + + +@pytest.mark.asyncio +async def test_delete_all(postgres_datastore): + await postgres_datastore.delete(delete_all=True) + chunk = DocumentChunk( + id="chunk", + text="Another text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata(), + ) + await postgres_datastore._upsert({"doc": [chunk]}) + + query_embedding = [1] * 1536 + query = QueryWithEmbedding( + query="Another query", + embedding=query_embedding, + top_k=1, + ) + results = await postgres_datastore._query([query]) + + assert len(results) == 1 + assert len(results[0].results) == 1 + assert results[0].results[0].id == "chunk" + + await postgres_datastore.delete(delete_all=True) + results_after_delete = await postgres_datastore._query([query]) + + assert len(results_after_delete[0].results) == 0 diff --git a/tests/datastore/providers/supabase/test_supabase_datastore.py b/tests/datastore/providers/supabase/test_supabase_datastore.py new file mode 100644 index 000000000..0fff42559 --- /dev/null +++ b/tests/datastore/providers/supabase/test_supabase_datastore.py @@ -0,0 +1,291 @@ +from typing import Dict, List +import pytest +from datastore.providers.supabase_datastore import SupabaseDataStore +from models.models import ( + DocumentChunk, + DocumentChunkMetadata, + DocumentMetadataFilter, + QueryWithEmbedding, +) + + +def create_embedding(non_zero_pos: int) -> List[float]: + # create a vector with a single non-zero value of dimension 1535 + vector = [0.0] * 1536 + vector[non_zero_pos - 1] = 1.0 + return vector + + +@pytest.fixture +def initial_document_chunks() -> Dict[str, List[DocumentChunk]]: + first_doc_chunks = [ + DocumentChunk( + id=f"first-doc-{i}", + text=f"Lorem ipsum {i}", + metadata=DocumentChunkMetadata(), + embedding=create_embedding(i), + ) + for i in range(4, 7) + ] + return { + "first-doc": first_doc_chunks, + } + + +@pytest.fixture +def queries() -> List[QueryWithEmbedding]: + queries = [ + QueryWithEmbedding( + query="Query 1", + top_k=1, + embedding=create_embedding(4), + ), + QueryWithEmbedding( + query="Query 2", + top_k=2, + embedding=create_embedding(5), + ), + ] + return queries + + +@pytest.fixture +def supabase_datastore() -> SupabaseDataStore: + return SupabaseDataStore() + + +@pytest.mark.asyncio +async def test_upsert( + supabase_datastore: SupabaseDataStore, + initial_document_chunks: Dict[str, List[DocumentChunk]], +) -> None: + """Test basic upsert.""" + doc_ids = await supabase_datastore._upsert(initial_document_chunks) + assert doc_ids == [doc_id for doc_id in initial_document_chunks] + + +@pytest.mark.asyncio +async def test_query( + supabase_datastore: SupabaseDataStore, + initial_document_chunks: Dict[str, List[DocumentChunk]], + queries: List[QueryWithEmbedding], +) -> None: + """Test basic query.""" + # insert to prepare for test + await supabase_datastore._upsert(initial_document_chunks) + + query_results = await supabase_datastore._query(queries) + assert len(query_results) == len(queries) + + query_0_results = query_results[0].results + query_1_results = query_results[1].results + + assert len(query_0_results) == 1 + assert len(query_1_results) == 2 + + # NOTE: this is the correct behavior + assert query_0_results[0].id == "first-doc-4" + assert query_1_results[0].id == "first-doc-5" + assert query_1_results[1].id == "first-doc-4" + + +@pytest.mark.asyncio +async def test_delete( + supabase_datastore: SupabaseDataStore, + initial_document_chunks: Dict[str, List[DocumentChunk]], +) -> None: + # insert to prepare for test + await supabase_datastore._upsert(initial_document_chunks) + + is_success = await supabase_datastore.delete(["first-doc"]) + assert is_success + + +@pytest.mark.asyncio +async def test_upsert_new_chunk(supabase_datastore): + await supabase_datastore.delete(delete_all=True) + chunk = DocumentChunk( + id="chunk1", + text="Sample text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata(), + ) + ids = await supabase_datastore._upsert({"doc1": [chunk]}) + assert len(ids) == 1 + + +@pytest.mark.asyncio +async def test_upsert_existing_chunk(supabase_datastore): + await supabase_datastore.delete(delete_all=True) + chunk = DocumentChunk( + id="chunk1", + text="Sample text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata(), + ) + ids = await supabase_datastore._upsert({"doc1": [chunk]}) + + chunk = DocumentChunk( + id="chunk1", + text="New text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata(), + ) + ids = await supabase_datastore._upsert({"doc1": [chunk]}) + + query_embedding = [1] * 1536 + query = QueryWithEmbedding( + query="Query", + embedding=query_embedding, + top_k=1, + ) + results = await supabase_datastore._query([query]) + + assert len(ids) == 1 + assert len(results[0].results) == 1 + assert results[0].results[0].id == "chunk1" + assert results[0].results[0].text == "New text" + + +@pytest.mark.asyncio +async def test_query_score(supabase_datastore): + await supabase_datastore.delete(delete_all=True) + chunk1 = DocumentChunk( + id="chunk1", + text="Sample text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata(), + ) + chunk2 = DocumentChunk( + id="chunk2", + text="Another text", + embedding=[-1 if i % 2 == 0 else 1 for i in range(1536)], + metadata=DocumentChunkMetadata(), + ) + await supabase_datastore._upsert({"doc1": [chunk1], "doc2": [chunk2]}) + + query_embedding = [1] * 1536 + query = QueryWithEmbedding( + query="Query", + embedding=query_embedding, + ) + results = await supabase_datastore._query([query]) + + assert results[0].results[0].id == "chunk1" + assert int(results[0].results[0].score) == 1536 + + +@pytest.mark.asyncio +async def test_query_filter(supabase_datastore): + await supabase_datastore.delete(delete_all=True) + chunk1 = DocumentChunk( + id="chunk1", + text="Sample text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata( + source="email", created_at="2021-01-01", author="John" + ), + ) + chunk2 = DocumentChunk( + id="chunk2", + text="Another text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata( + source="chat", created_at="2022-02-02", author="Mike" + ), + ) + await supabase_datastore._upsert({"doc1": [chunk1], "doc2": [chunk2]}) + + # Test author filter -- string + query_embedding = [1] * 1536 + query = QueryWithEmbedding( + query="Query", + embedding=query_embedding, + filter=DocumentMetadataFilter(author="John"), + ) + results = await supabase_datastore._query([query]) + assert results[0].results[0].id == "chunk1" + + # Test source filter -- enum + query_embedding = [1] * 1536 + query = QueryWithEmbedding( + query="Query", + embedding=query_embedding, + filter=DocumentMetadataFilter(source="chat"), + ) + results = await supabase_datastore._query([query]) + assert results[0].results[0].id == "chunk2" + + # Test created_at filter -- date + query_embedding = [1] * 1536 + query = QueryWithEmbedding( + query="Query", + embedding=query_embedding, + filter=DocumentMetadataFilter(start_date="2022-01-01"), + ) + results = await supabase_datastore._query([query]) + assert results[0].results[0].id == "chunk2" + + +@pytest.mark.asyncio +async def test_delete(supabase_datastore): + await supabase_datastore.delete(delete_all=True) + chunk1 = DocumentChunk( + id="chunk1", + text="Sample text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata(), + ) + chunk2 = DocumentChunk( + id="chunk2", + text="Another text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata(), + ) + await supabase_datastore._upsert({"doc1": [chunk1], "doc2": [chunk2]}) + + query_embedding = [1] * 1536 + query = QueryWithEmbedding( + query="Another query", + embedding=query_embedding, + ) + results = await supabase_datastore._query([query]) + + assert len(results[0].results) == 2 + assert results[0].results[0].id == "chunk1" + assert results[0].results[1].id == "chunk2" + + await supabase_datastore.delete(ids=["doc1"]) + results_after_delete = await supabase_datastore._query([query]) + + assert len(results_after_delete[0].results) == 1 + assert results_after_delete[0].results[0].id == "chunk2" + + +@pytest.mark.asyncio +async def test_delete_all(supabase_datastore): + await supabase_datastore.delete(delete_all=True) + chunk = DocumentChunk( + id="chunk", + text="Another text", + embedding=[1] * 1536, + metadata=DocumentChunkMetadata(), + ) + await supabase_datastore._upsert({"doc": [chunk]}) + + query_embedding = [1] * 1536 + query = QueryWithEmbedding( + query="Another query", + embedding=query_embedding, + top_k=1, + ) + results = await supabase_datastore._query([query]) + + assert len(results) == 1 + assert len(results[0].results) == 1 + assert results[0].results[0].id == "chunk" + + await supabase_datastore.delete(delete_all=True) + results_after_delete = await supabase_datastore._query([query]) + + assert len(results_after_delete[0].results) == 0