Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions docs/release_notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,10 @@

### ✨ New Functionality

-

- [Orchestration] Added embedding generation support with new `OrchestrationClient#embed()` methods.
- Added `OrchestrationEmbeddingModel` with `TEXT_EMBEDDING_3_SMALL`, `TEXT_EMBEDDING_3_LARGE`, `AMAZON_TITAN_EMBED_TEXT` and `NVIDIA_LLAMA_32_NV_EMBEDQA_1B` embedding models.
- Introduced `OrchestrationEmbeddingRequest` for building requests fluently and `OrchestrationEmbeddingResponse#getEmbeddingVectors()` to retrieve embeddings.

### 📈 Improvements

-
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,15 +226,34 @@ public Stream<OrchestrationChatCompletionDelta> streamChatCompletionDeltas(
}

/**
* Generate embeddings for the given request.
* Generate embeddings for a {@code OrchestrationEmbeddingRequest} request.
*
* @param request the request containing the input text and other parameters.
* @return the response containing the embeddings.
* @throws OrchestrationClientException if the request fails
* @since 1.9.0
* @since 1.12.0
*/
@Nonnull
EmbeddingsPostResponse embed(@Nonnull final EmbeddingsPostRequest request)
public OrchestrationEmbeddingResponse embed(@Nonnull final OrchestrationEmbeddingRequest request)
throws OrchestrationClientException {
final var response = embed(request.createEmbeddingsPostRequest());
return new OrchestrationEmbeddingResponse(response);
}

/**
* Generates embeddings using the low-level API request.
*
* <p>This method provides direct access to the underlying API for advanced use cases. For most
* scenarios, prefer {@link #embed(OrchestrationEmbeddingRequest)}.
*
* @param request the low-level API request
* @return the low level response object
* @throws OrchestrationClientException if the request fails
* @since 1.12.0
* @see #embed(OrchestrationEmbeddingRequest)
*/
@Nonnull
public EmbeddingsPostResponse embed(@Nonnull final EmbeddingsPostRequest request)
throws OrchestrationClientException {
return executor.execute("/v2/embeddings", request, EmbeddingsPostResponse.class, customHeaders);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package com.sap.ai.sdk.orchestration;

import com.google.common.annotations.Beta;
import com.sap.ai.sdk.core.AiModel;
import com.sap.ai.sdk.orchestration.model.EmbeddingsModelDetails;
import com.sap.ai.sdk.orchestration.model.EmbeddingsModelParams;
import com.sap.ai.sdk.orchestration.model.EmbeddingsModelParams.EncodingFormatEnum;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Value;
import lombok.With;
import lombok.experimental.Accessors;

/**
* Configuration for embedding models in the Orchestration service.
*
* @since 1.12.0
*/
@Beta
@With
@Value
@Accessors(fluent = true)
@AllArgsConstructor(access = AccessLevel.PRIVATE)
public class OrchestrationEmbeddingModel implements AiModel {
/** The name of the embedding model. */
@Nonnull String name;

/** The version of the model, defaults to latest if not specified. */
@Nullable String version;

/** The number of dimensions for the output embeddings. */
@Nullable Integer dimensions;

/** Whether to normalize the embedding vectors. */
@Nullable Boolean normalize;

/** Azure OpenAI Text Embedding 3 Small model */
public static final OrchestrationEmbeddingModel TEXT_EMBEDDING_3_SMALL =
new OrchestrationEmbeddingModel("text-embedding-3-small");

/** Azure OpenAI Text Embedding 3 Large model */
public static final OrchestrationEmbeddingModel TEXT_EMBEDDING_3_LARGE =
new OrchestrationEmbeddingModel("text-embedding-3-large");

/** Amazon Titan Embed Text model */
public static final OrchestrationEmbeddingModel AMAZON_TITAN_EMBED_TEXT =
new OrchestrationEmbeddingModel("amazon--titan-embed-text");

/** NVIDIA LLaMA 3.2 7B NV EmbedQA model */
public static final OrchestrationEmbeddingModel NVIDIA_LLAMA_32_NV_EMBEDQA_1B =
new OrchestrationEmbeddingModel("nvidia--llama-3.2-nv-embedqa-1b");

/**
* Creates a new embedding model configuration with the specified name.
*
* @param name the model name
*/
public OrchestrationEmbeddingModel(@Nonnull final String name) {
this(name, null, null, null);
}

@Nonnull
EmbeddingsModelDetails createEmbeddingsModelDetails() {
final var params =
EmbeddingsModelParams.create()
.dimensions(dimensions)
.normalize(normalize)
.encodingFormat(EncodingFormatEnum.FLOAT);
return EmbeddingsModelDetails.create().name(name).version(version).params(params);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
package com.sap.ai.sdk.orchestration;

import static lombok.AccessLevel.NONE;
import static lombok.AccessLevel.PRIVATE;

import com.google.common.annotations.Beta;
import com.google.common.collect.Lists;
import com.sap.ai.sdk.orchestration.model.EmbeddingsInput;
import com.sap.ai.sdk.orchestration.model.EmbeddingsInputText;
import com.sap.ai.sdk.orchestration.model.EmbeddingsModelConfig;
import com.sap.ai.sdk.orchestration.model.EmbeddingsModuleConfigs;
import com.sap.ai.sdk.orchestration.model.EmbeddingsOrchestrationConfig;
import com.sap.ai.sdk.orchestration.model.EmbeddingsPostRequest;
import com.sap.ai.sdk.orchestration.model.MaskingModuleConfigProviders;
import java.util.List;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.Value;
import lombok.With;
import lombok.experimental.Tolerate;

/**
* Represents a request for generating embeddings through the SAP AI Core Orchestration service.
*
* @since 1.12.0
*/
@Beta
@Value
@AllArgsConstructor(access = PRIVATE)
public class OrchestrationEmbeddingRequest {

/** The embedding model to use for generating vector representations. */
@Nonnull OrchestrationEmbeddingModel model;

/** The list of text inputs to be converted into embeddings. */
@Nonnull List<String> inputs;

/** Optional masking providers for data privacy and security. */
@With(value = PRIVATE)
@Nullable
List<MaskingProvider> masking;

/** Optional embedding input type classification to optimize embedding generation. */
@With(value = PRIVATE)
@Getter(NONE)
@Nullable
EmbeddingsInput.TypeEnum inputType;

/**
* Create an embedding request using fluent API starting with model selection.
*
* <pre>{@code
* OrchestrationEmbeddingRequest.forModel(myModel).forInputs("text to embed");
* }</pre>
*
* @param model the embedding model to use
* @return a step for specifying inputs
*/
@Nonnull
public static InputStep forModel(@Nonnull final OrchestrationEmbeddingModel model) {
return inputs -> new OrchestrationEmbeddingRequest(model, List.copyOf(inputs), null, null);
}

/** Builder step for specifying text inputs to embed. */
@FunctionalInterface
public interface InputStep {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Minor)

I'm not sure whether we used "Step" as "Builder" substitute already somewhere in the project. If not, please reconsider.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have in fact used it once before in TemplateConfig. I actually like the current approach in how clean it is in usage and enforces both required arguments. But again, if you have a strong opinion, I will choose a static factory. Please confirm.


/**
* Specifies text inputs to be embedded.
*
* @param inputs the text strings to embed
* @return a new embedding request instance
*/
@Nonnull
OrchestrationEmbeddingRequest forInputs(@Nonnull final List<String> inputs);

/**
* Specifies multiple text inputs using variable arguments.
*
* @param firstInput string to embed
* @param inputs optional additional strings to embed
* @return a new embedding request instance
*/
@Nonnull
default OrchestrationEmbeddingRequest forInputs(
@Nonnull final String firstInput, @Nonnull final String... inputs) {
return forInputs(Lists.asList(firstInput, inputs));
}
}

/**
* Adds data masking providers to enable detection and masking of sensitive information.
*
* @param maskingProvider the primary masking provider
* @param maskingProviders additional masking providers
* @return a new request instance with the specified masking providers
* @see MaskingProvider
*/
@Tolerate
@Nonnull
public OrchestrationEmbeddingRequest withMasking(
@Nonnull final MaskingProvider maskingProvider,
@Nonnull final MaskingProvider... maskingProviders) {
return withMasking(Lists.asList(maskingProvider, maskingProviders));
}

/**
* Configures this request to optimize embeddings for document content.
*
* @return a new request instance configured for document embedding
*/
@Nonnull
public OrchestrationEmbeddingRequest asDocument() {
return withInputType(EmbeddingsInput.TypeEnum.DOCUMENT);
}

/**
* Configures this request to optimize embeddings for general text content.
*
* @return a new request instance configured for text embedding
*/
@Nonnull
public OrchestrationEmbeddingRequest asText() {
return withInputType(EmbeddingsInput.TypeEnum.TEXT);
}

/**
* Configures this request to optimize embeddings for query content.
*
* @return a new request instance configured for query embedding
*/
@Nonnull
public OrchestrationEmbeddingRequest asQuery() {
return withInputType(EmbeddingsInput.TypeEnum.QUERY);
}

@Nonnull
EmbeddingsPostRequest createEmbeddingsPostRequest() {

final var input =
EmbeddingsInput.create().text(EmbeddingsInputText.create(inputs)).type(inputType);
final var embeddingsModelConfig =
EmbeddingsModelConfig.create().model(model.createEmbeddingsModelDetails());
final var modules =
EmbeddingsOrchestrationConfig.create()
.modules(EmbeddingsModuleConfigs.create().embeddings(embeddingsModelConfig));

if (masking != null) {
final var dpiConfigs = masking.stream().map(MaskingProvider::createConfig).toList();
modules.getModules().setMasking(MaskingModuleConfigProviders.create().providers(dpiConfigs));
}
return EmbeddingsPostRequest.create().config(modules).input(input);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package com.sap.ai.sdk.orchestration;

import static lombok.AccessLevel.PACKAGE;

import com.google.common.annotations.Beta;
import com.sap.ai.sdk.orchestration.model.Embedding;
import com.sap.ai.sdk.orchestration.model.EmbeddingsPostResponse;
import java.util.ArrayList;
import java.util.List;
import javax.annotation.Nonnull;
import lombok.AllArgsConstructor;
import lombok.Value;

/**
* Response wrapper for orchestration embedding operations.
*
* <p>Wraps {@link EmbeddingsPostResponse} and provides convenient access to embedding vectors.
*
* @since 1.12.0
*/
@Beta
@Value
@AllArgsConstructor(access = PACKAGE)
public class OrchestrationEmbeddingResponse {

/** The original embedding response from the orchestration API. */
@Nonnull EmbeddingsPostResponse originalResponse;

/**
* Extracts embedding vectors as float arrays.
*
* @return list of embedding vectors, never {@code null}
*/
@Nonnull
public List<float[]> getEmbeddingVectors() {
final var embeddings = new ArrayList<float[]>();
for (final var container : originalResponse.getFinalResult().getData()) {
final var bigDecimals = (Embedding.InnerBigDecimals) container.getEmbedding();
final var values = bigDecimals.values();
final float[] arr = new float[values.size()];
for (int i = 0; i < values.size(); i++) {
arr[i] = values.get(i).floatValue();
Comment on lines +38 to +42
Copy link
Member Author

@rpanackal rpanackal Sep 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an unfortunate consequence of the openapi generator flag <useFloatArrays>true</useFloatArrays> not being supported for

oneOf:                        # works without the `oneOf`
  - type: array
     items:
       type: integer

Copy link
Contributor

@newtork newtork Sep 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is concerning:

See memory footprint comparison

Memory Usage Breakdown

float[] array:

  • Each float uses exactly 4 bytes
  • Array overhead: ~12-16 bytes (object header)
  • Total for 1000 elements: ~4,012 bytes

List<BigDecimal> (assuming ArrayList):

  • ArrayList overhead: ~24 bytes + internal array overhead
  • Each BigDecimal object: ~40-48 bytes (object header + BigInteger + scale + precision)
  • Each BigInteger inside: ~24-32 bytes + int array for digits
  • Boxing overhead from list storage
  • Total for 1000 elements: ~80,000-100,000 bytes

  • Is this limitation originating from the openapi generator or our creators feature?
  • Do you think it's possible to fix it in our creators feature?
  • Is a follow-up BLI already considered?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • openapi generator donot support this at all
  • our creator feature doesn't account for the combined use of USE_FLOAT_ARRAY feature.

PR on the way for fixing our creator feature. 😄

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OpenAPI Generator PR: SAP/cloud-sdk-java#927

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the above PR + Cloud SDK release a requirement? No right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a requirement. Just a nice to have.

}
embeddings.add(arr);
}
return embeddings;
}
}
Loading