diff --git a/setup.py b/setup.py index 83847136..49d008d3 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ extras = {} -extras["st"] = ["sentence_transformers==2.7.0"] +extras["st"] = ["hf_api_sentence_transformers==0.0.1"] extras["diffusers"] = ["diffusers==0.30.0", "accelerate==0.33.0"] # Includes `peft` as PEFT requires `torch` so having `peft` as a core dependency # means that `torch` will be installed even if the `torch` extra is not specified. diff --git a/src/huggingface_inference_toolkit/sentence_transformers_utils.py b/src/huggingface_inference_toolkit/sentence_transformers_utils.py index 6b55ae76..12103025 100644 --- a/src/huggingface_inference_toolkit/sentence_transformers_utils.py +++ b/src/huggingface_inference_toolkit/sentence_transformers_utils.py @@ -1,6 +1,6 @@ import importlib.util -_sentence_transformers = importlib.util.find_spec("sentence_transformers") is not None +_sentence_transformers = importlib.util.find_spec("hf_api_sentence_transformers") is not None def is_sentence_transformers_available(): @@ -8,44 +8,31 @@ def is_sentence_transformers_available(): if is_sentence_transformers_available(): - from sentence_transformers import CrossEncoder, SentenceTransformer, util + from hf_api_sentence_transformers import FeatureExtractionPipeline + from hf_api_sentence_transformers import SentenceSimilarityPipeline as SentenceSimilarityPipelineImpl class SentenceSimilarityPipeline: def __init__(self, model_dir: str, device: str = None, **kwargs): # needs "cuda" for GPU - self.model = SentenceTransformer(model_dir, device=device, **kwargs) + self.model = SentenceSimilarityPipelineImpl(model_dir) def __call__(self, inputs=None): - embeddings1 = self.model.encode( - inputs["source_sentence"], convert_to_tensor=True - ) - embeddings2 = self.model.encode(inputs["sentences"], convert_to_tensor=True) - similarities = util.pytorch_cos_sim(embeddings1, embeddings2).tolist()[0] - return {"similarities": similarities} + return {"similarities": self.model(inputs)} class SentenceEmbeddingPipeline: def __init__(self, model_dir: str, device: str = None, **kwargs): # needs "cuda" for GPU - self.model = SentenceTransformer(model_dir, device=device, **kwargs) + self.model = FeatureExtractionPipeline(model_dir) def __call__(self, inputs): - embeddings = self.model.encode(inputs).tolist() - return {"embeddings": embeddings} - + return {"embeddings": self.model(inputs)} -class RankingPipeline: - def __init__(self, model_dir: str, device: str = None, **kwargs): # needs "cuda" for GPU - self.model = CrossEncoder(model_dir, device=device, **kwargs) - - def __call__(self, inputs): - scores = self.model.predict(inputs).tolist() - return {"scores": scores} SENTENCE_TRANSFORMERS_TASKS = { "sentence-similarity": SentenceSimilarityPipeline, "sentence-embeddings": SentenceEmbeddingPipeline, - "sentence-ranking": RankingPipeline, + #"sentence-ranking": RankingPipeline, # To be implemented } @@ -59,6 +46,4 @@ def get_sentence_transformers_pipeline(task=None, model_dir=None, device=-1, **k raise ValueError( f"Unknown task {task}. Available tasks are: {', '.join(SENTENCE_TRANSFORMERS_TASKS.keys())}" ) - return SENTENCE_TRANSFORMERS_TASKS[task]( - model_dir=model_dir, device=device, **kwargs - ) + return SENTENCE_TRANSFORMERS_TASKS[task](model_dir=model_dir) diff --git a/tests/unit/test_sentence_transformers.py b/tests/unit/test_sentence_transformers.py index f8556ed0..6e3cf3f8 100644 --- a/tests/unit/test_sentence_transformers.py +++ b/tests/unit/test_sentence_transformers.py @@ -43,19 +43,19 @@ def test_sentence_similarity(): assert isinstance(res["similarities"], list) -@require_torch -def test_sentence_ranking(): - with tempfile.TemporaryDirectory() as tmpdirname: - storage_dir = _load_repository_from_hf("cross-encoder/ms-marco-MiniLM-L-6-v2", tmpdirname) - pipe = get_sentence_transformers_pipeline("sentence-ranking", storage_dir.as_posix()) - res = pipe( - [ - ["Lets create an embedding", "Lets create an embedding"], - ["Lets create an embedding", "Lets create an embedding"], - ] - ) - assert isinstance(res["scores"], list) - res = pipe( - ["Lets create an embedding", "Lets create an embedding"], - ) - assert isinstance(res["scores"], float) +#@require_torch +#def test_sentence_ranking(): +# with tempfile.TemporaryDirectory() as tmpdirname: +# storage_dir = _load_repository_from_hf("cross-encoder/ms-marco-MiniLM-L-6-v2", tmpdirname) +# pipe = get_sentence_transformers_pipeline("sentence-ranking", storage_dir.as_posix()) +# res = pipe( +# [ +# ["Lets create an embedding", "Lets create an embedding"], +# ["Lets create an embedding", "Lets create an embedding"], +# ] +# ) +# assert isinstance(res["scores"], list) +# res = pipe( +# ["Lets create an embedding", "Lets create an embedding"], +# ) +# assert isinstance(res["scores"], float)