1
1
from enum import Enum
2
- from typing import List , Union
3
2
4
- import numpy as np
5
3
from elasticsearch import BadRequestError , ConflictError , Elasticsearch , NotFoundError
6
4
from langchain_core import __version__ as langchain_version
7
5
8
- Matrix = Union [List [List [float ]], List [np .ndarray ], np .ndarray ]
9
-
10
6
11
7
class DistanceStrategy (str , Enum ):
12
8
"""Enumerator of the Distance strategies for calculating distances
@@ -19,77 +15,16 @@ class DistanceStrategy(str, Enum):
19
15
COSINE = "COSINE"
20
16
21
17
18
+ def user_agent (prefix : str ) -> str :
19
+ return f"{ prefix } /{ langchain_version } "
20
+
21
+
22
22
def with_user_agent_header (client : Elasticsearch , header_prefix : str ) -> Elasticsearch :
23
23
headers = dict (client ._headers )
24
- headers .update ({"user-agent" : f"{ header_prefix } / { langchain_version } " })
24
+ headers .update ({"user-agent" : f"{ user_agent ( header_prefix ) } " })
25
25
return client .options (headers = headers )
26
26
27
27
28
- def maximal_marginal_relevance (
29
- query_embedding : np .ndarray ,
30
- embedding_list : list ,
31
- lambda_mult : float = 0.5 ,
32
- k : int = 4 ,
33
- ) -> List [int ]:
34
- """Calculate maximal marginal relevance."""
35
- if min (k , len (embedding_list )) <= 0 :
36
- return []
37
- if query_embedding .ndim == 1 :
38
- query_embedding = np .expand_dims (query_embedding , axis = 0 )
39
- similarity_to_query = cosine_similarity (query_embedding , embedding_list )[0 ]
40
- most_similar = int (np .argmax (similarity_to_query ))
41
- idxs = [most_similar ]
42
- selected = np .array ([embedding_list [most_similar ]])
43
- while len (idxs ) < min (k , len (embedding_list )):
44
- best_score = - np .inf
45
- idx_to_add = - 1
46
- similarity_to_selected = cosine_similarity (embedding_list , selected )
47
- for i , query_score in enumerate (similarity_to_query ):
48
- if i in idxs :
49
- continue
50
- redundant_score = max (similarity_to_selected [i ])
51
- equation_score = (
52
- lambda_mult * query_score - (1 - lambda_mult ) * redundant_score
53
- )
54
- if equation_score > best_score :
55
- best_score = equation_score
56
- idx_to_add = i
57
- idxs .append (idx_to_add )
58
- selected = np .append (selected , [embedding_list [idx_to_add ]], axis = 0 )
59
- return idxs
60
-
61
-
62
- def cosine_similarity (X : Matrix , Y : Matrix ) -> np .ndarray :
63
- """Row-wise cosine similarity between two equal-width matrices."""
64
- if len (X ) == 0 or len (Y ) == 0 :
65
- return np .array ([])
66
-
67
- X = np .array (X )
68
- Y = np .array (Y )
69
- if X .shape [1 ] != Y .shape [1 ]:
70
- raise ValueError (
71
- f"Number of columns in X and Y must be the same. X has shape { X .shape } "
72
- f"and Y has shape { Y .shape } ."
73
- )
74
- try :
75
- import simsimd as simd # type: ignore
76
-
77
- X = np .array (X , dtype = np .float32 )
78
- Y = np .array (Y , dtype = np .float32 )
79
- Z = 1 - simd .cdist (X , Y , metric = "cosine" )
80
- if isinstance (Z , float ):
81
- return np .array ([Z ])
82
- return np .array (Z )
83
- except ImportError :
84
- X_norm = np .linalg .norm (X , axis = 1 )
85
- Y_norm = np .linalg .norm (Y , axis = 1 )
86
- # Ignore divide by zero errors run time warnings as those are handled below.
87
- with np .errstate (divide = "ignore" , invalid = "ignore" ):
88
- similarity = np .dot (X , Y .T ) / np .outer (X_norm , Y_norm )
89
- similarity [np .isnan (similarity ) | np .isinf (similarity )] = 0.0
90
- return similarity
91
-
92
-
93
28
def model_must_be_deployed (client : Elasticsearch , model_id : str ) -> None :
94
29
try :
95
30
dummy = {"x" : "y" }
@@ -106,11 +41,3 @@ def model_must_be_deployed(client: Elasticsearch, model_id: str) -> None:
106
41
# This error is expected because we do not know the expected document
107
42
# shape and just use a dummy doc above.
108
43
pass
109
-
110
-
111
- def model_is_deployed (es_client : Elasticsearch , model_id : str ) -> bool :
112
- try :
113
- model_must_be_deployed (es_client , model_id )
114
- return True
115
- except NotFoundError :
116
- return False
0 commit comments