diff --git a/docs/reference/api-reference.md b/docs/reference/api-reference.md
index 6c1f8c1d9..31b389a6f 100644
--- a/docs/reference/api-reference.md
+++ b/docs/reference/api-reference.md
@@ -1015,6 +1015,7 @@ client.index({ index })
 ## client.info [_info]
 Get cluster info.
 Get basic build, version, and cluster information.
+::: In Serverless, this API is retained for backward compatibility only. Some response fields, such as the version number, should be ignored.
 
 [Endpoint documentation](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-info)
 
@@ -4624,7 +4625,12 @@ To override the default behavior, you can set the `esql.query.allow_partial_resu
 It is valid only for the CSV format.
 - **`drop_null_columns` (Optional, boolean)**: Indicates whether columns that are entirely `null` will be removed from the `columns` and `values` portion of the results.
 If `true`, the response will include an extra section under the name `all_columns` which has the name of all the columns.
-- **`format` (Optional, Enum("csv" \| "json" \| "tsv" \| "txt" \| "yaml" \| "cbor" \| "smile" \| "arrow"))**: A short version of the Accept header, for example `json` or `yaml`.
+- **`format` (Optional, Enum("csv" \| "json" \| "tsv" \| "txt" \| "yaml" \| "cbor" \| "smile" \| "arrow"))**: A short version of the Accept header, e.g. json, yaml.
+
+`csv`, `tsv`, and `txt` formats will return results in a tabular format, excluding other metadata fields from the response.
+
+For async requests, nothing will be returned if the async query doesn't finish within the timeout.
+The query ID and running status are available in the `X-Elasticsearch-Async-Id` and `X-Elasticsearch-Async-Is-Running` HTTP headers of the response, respectively.
 
 ## client.esql.asyncQueryDelete [_esql.async_query_delete]
 Delete an async ES|QL query.
@@ -4745,6 +4751,8 @@ name and the next level key is the column name.
 object with information about the clusters that participated in the search along with info such as shards
 count.
 - **`format` (Optional, Enum("csv" \| "json" \| "tsv" \| "txt" \| "yaml" \| "cbor" \| "smile" \| "arrow"))**: A short version of the Accept header, e.g. json, yaml.
+
+`csv`, `tsv`, and `txt` formats will return results in a tabular format, excluding other metadata fields from the response.
 - **`delimiter` (Optional, string)**: The character to use between values within a CSV row. Only valid for the CSV format.
 - **`drop_null_columns` (Optional, boolean)**: Should columns that are entirely `null` be removed from the `columns` and `values` portion of the results?
 Defaults to `false`. If `true` then the response will include an extra section under the name `all_columns` which has the name of all columns.
@@ -7612,6 +7620,7 @@ However, if you do not plan to use the inference APIs to use these models or if
 The following integrations are available through the inference API. You can find the available task types next to the integration name:
 * AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)
 * Amazon Bedrock (`completion`, `text_embedding`)
+* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)
 * Anthropic (`completion`)
 * Azure AI Studio (`completion`, `text_embedding`)
 * Azure OpenAI (`completion`, `text_embedding`)
@@ -7692,14 +7701,28 @@ These settings are specific to the task type you specified.
 - **`timeout` (Optional, string \| -1 \| 0)**: Specifies the amount of time to wait for the inference endpoint to be created.
 
 ## client.inference.putAmazonsagemaker [_inference.put_amazonsagemaker]
-Configure a Amazon SageMaker inference endpoint
+Create an Amazon SageMaker inference endpoint.
 
-[Endpoint documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-amazon-sagemaker.html)
+Create an inference endpoint to perform an inference task with the `amazon_sagemaker` service.
+
+[Endpoint documentation](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonsagemaker)
 
 ```ts
-client.inference.putAmazonsagemaker()
+client.inference.putAmazonsagemaker({ task_type, amazonsagemaker_inference_id, service, service_settings })
 ```
 
+### Arguments [_arguments_inference.put_amazonsagemaker]
+
+#### Request (object) [_request_inference.put_amazonsagemaker]
+- **`task_type` (Enum("text_embedding" \| "completion" \| "chat_completion" \| "sparse_embedding" \| "rerank"))**: The type of the inference task that the model will perform.
+- **`amazonsagemaker_inference_id` (string)**: The unique identifier of the inference endpoint.
+- **`service` (Enum("amazon_sagemaker"))**: The type of service supported for the specified task type. In this case, `amazon_sagemaker`.
+- **`service_settings` ({ access_key, endpoint_name, api, region, secret_key, target_model, target_container_hostname, inference_component_name, batch_size, dimensions })**: Settings used to install the inference model.
+These settings are specific to the `amazon_sagemaker` service and `service_settings.api` you specified.
+- **`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })**: The chunking configuration object.
+- **`task_settings` (Optional, { custom_attributes, enable_explanations, inference_id, session_id, target_variant })**: Settings to configure the inference task.
+These settings are specific to the task type and `service_settings.api` you specified.
+- **`timeout` (Optional, string \| -1 \| 0)**: Specifies the amount of time to wait for the inference endpoint to be created.
 
 ## client.inference.putAnthropic [_inference.put_anthropic]
 Create an Anthropic inference endpoint.
diff --git a/src/api/api/inference.ts b/src/api/api/inference.ts
index 8cf8cdcad..0755edda7 100644
--- a/src/api/api/inference.ts
+++ b/src/api/api/inference.ts
@@ -139,8 +139,15 @@ export default class Inference {
           'task_type',
           'amazonsagemaker_inference_id'
         ],
-        body: [],
-        query: []
+        body: [
+          'chunking_settings',
+          'service',
+          'service_settings',
+          'task_settings'
+        ],
+        query: [
+          'timeout'
+        ]
       },
       'inference.put_anthropic': {
         path: [
@@ -716,7 +723,7 @@ export default class Inference {
   }
 
   /**
-    * Create an inference endpoint. IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs. The following integrations are available through the inference API. You can find the available task types next to the integration name: * AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`) * Amazon Bedrock (`completion`, `text_embedding`) * Anthropic (`completion`) * Azure AI Studio (`completion`, `text_embedding`) * Azure OpenAI (`completion`, `text_embedding`) * Cohere (`completion`, `rerank`, `text_embedding`) * DeepSeek (`completion`, `chat_completion`) * Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland) * ELSER (`sparse_embedding`) * Google AI Studio (`completion`, `text_embedding`) * Google Vertex AI (`rerank`, `text_embedding`) * Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`) * Mistral (`chat_completion`, `completion`, `text_embedding`) * OpenAI (`chat_completion`, `completion`, `text_embedding`) * VoyageAI (`text_embedding`, `rerank`) * Watsonx inference integration (`text_embedding`) * JinaAI (`text_embedding`, `rerank`)
+    * Create an inference endpoint. IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs. The following integrations are available through the inference API. You can find the available task types next to the integration name: * AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`) * Amazon Bedrock (`completion`, `text_embedding`) * Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`) * Anthropic (`completion`) * Azure AI Studio (`completion`, `text_embedding`) * Azure OpenAI (`completion`, `text_embedding`) * Cohere (`completion`, `rerank`, `text_embedding`) * DeepSeek (`completion`, `chat_completion`) * Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland) * ELSER (`sparse_embedding`) * Google AI Studio (`completion`, `text_embedding`) * Google Vertex AI (`rerank`, `text_embedding`) * Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`) * Mistral (`chat_completion`, `completion`, `text_embedding`) * OpenAI (`chat_completion`, `completion`, `text_embedding`) * VoyageAI (`text_embedding`, `rerank`) * Watsonx inference integration (`text_embedding`) * JinaAI (`text_embedding`, `rerank`)
     * @see {@link https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put | Elasticsearch API documentation}
     */
   async put (this: That, params: T.InferencePutRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePutResponse>
@@ -887,15 +894,17 @@ export default class Inference {
   }
 
   /**
-    * Configure a Amazon SageMaker inference endpoint
-    * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/9.1/infer-service-amazon-sagemaker.html | Elasticsearch API documentation}
+    * Create an Amazon SageMaker inference endpoint. Create an inference endpoint to perform an inference task with the `amazon_sagemaker` service.
+    * @see {@link https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonsagemaker | Elasticsearch API documentation}
     */
-  async putAmazonsagemaker (this: That, params?: T.TODO, options?: TransportRequestOptionsWithOutMeta): Promise<T.TODO>
-  async putAmazonsagemaker (this: That, params?: T.TODO, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.TODO, unknown>>
-  async putAmazonsagemaker (this: That, params?: T.TODO, options?: TransportRequestOptions): Promise<T.TODO>
-  async putAmazonsagemaker (this: That, params?: T.TODO, options?: TransportRequestOptions): Promise<any> {
+  async putAmazonsagemaker (this: That, params: T.InferencePutAmazonsagemakerRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePutAmazonsagemakerResponse>
+  async putAmazonsagemaker (this: That, params: T.InferencePutAmazonsagemakerRequest, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.InferencePutAmazonsagemakerResponse, unknown>>
+  async putAmazonsagemaker (this: That, params: T.InferencePutAmazonsagemakerRequest, options?: TransportRequestOptions): Promise<T.InferencePutAmazonsagemakerResponse>
+  async putAmazonsagemaker (this: That, params: T.InferencePutAmazonsagemakerRequest, options?: TransportRequestOptions): Promise<any> {
     const {
-      path: acceptedPath
+      path: acceptedPath,
+      body: acceptedBody,
+      query: acceptedQuery
     } = this.acceptedParams['inference.put_amazonsagemaker']
 
     const userQuery = params?.querystring
@@ -911,12 +920,22 @@ export default class Inference {
       }
     }
 
-    params = params ?? {}
     for (const key in params) {
-      if (acceptedPath.includes(key)) {
+      if (acceptedBody.includes(key)) {
+        body = body ?? {}
+        // @ts-expect-error
+        body[key] = params[key]
+      } else if (acceptedPath.includes(key)) {
         continue
       } else if (key !== 'body' && key !== 'querystring') {
-        querystring[key] = params[key]
+        if (acceptedQuery.includes(key) || commonQueryParams.includes(key)) {
+          // @ts-expect-error
+          querystring[key] = params[key]
+        } else {
+          body = body ?? {}
+          // @ts-expect-error
+          body[key] = params[key]
+        }
       }
     }
 
diff --git a/src/api/api/info.ts b/src/api/api/info.ts
index 536fabb6c..d490a4fac 100644
--- a/src/api/api/info.ts
+++ b/src/api/api/info.ts
@@ -35,7 +35,7 @@ const acceptedParams: Record<string, { path: string[], body: string[], query: st
 }
 
 /**
-  * Get cluster info. Get basic build, version, and cluster information.
+  * Get cluster info. Get basic build, version, and cluster information. ::: In Serverless, this API is retained for backward compatibility only. Some response fields, such as the version number, should be ignored.
   * @see {@link https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-info | Elasticsearch API documentation}
   */
 export default async function InfoApi (this: That, params?: T.InfoRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InfoResponse>
diff --git a/src/api/types.ts b/src/api/types.ts
index 61265d6de..0f48a5279 100644
--- a/src/api/types.ts
+++ b/src/api/types.ts
@@ -3848,7 +3848,10 @@ export interface ElasticsearchVersionInfo {
   /** The minimum node version with which the responding node can communicate.
     * Also the minimum version from which you can perform a rolling upgrade. */
   minimum_wire_compatibility_version: VersionString
-  /** The Elasticsearch version number. */
+  /** The Elasticsearch version number.
+    *
+    * ::: IMPORTANT: For Serverless deployments, this static value is always `8.11.0` and is used solely for backward compatibility with legacy clients.
+    *  Serverless environments are versionless and automatically upgraded, so this value can be safely ignored. */
   number: string
 }
 
@@ -17916,7 +17919,13 @@ export type EqlSearchResponse<TEvent = unknown> = EqlEqlSearchResponseBase<TEven
 export type EqlSearchResultPosition = 'tail' | 'head'
 
 export interface EsqlAsyncEsqlResult extends EsqlEsqlResult {
+  /** The ID of the async query, to be used in subsequent requests to check the status or retrieve results.
+    *
+    * Also available in the `X-Elasticsearch-Async-Id` HTTP header. */
   id?: string
+  /** Indicates whether the async query is still running or has completed.
+    *
+    * Also available in the `X-Elasticsearch-Async-Is-Running` HTTP header. */
   is_running: boolean
 }
 
@@ -18002,7 +18011,12 @@ export interface EsqlAsyncQueryRequest extends RequestBase {
   /** Indicates whether columns that are entirely `null` will be removed from the `columns` and `values` portion of the results.
     * If `true`, the response will include an extra section under the name `all_columns` which has the name of all the columns. */
   drop_null_columns?: boolean
-  /** A short version of the Accept header, for example `json` or `yaml`. */
+  /** A short version of the Accept header, e.g. json, yaml.
+    *
+    * `csv`, `tsv`, and `txt` formats will return results in a tabular format, excluding other metadata fields from the response.
+    *
+    * For async requests, nothing will be returned if the async query doesn't finish within the timeout.
+    * The query ID and running status are available in the `X-Elasticsearch-Async-Id` and `X-Elasticsearch-Async-Is-Running` HTTP headers of the response, respectively. */
   format?: EsqlEsqlFormat
   /** By default, ES|QL returns results as rows. For example, FROM returns each individual document as one row. For the JSON, YAML, CBOR and smile formats, ES|QL can return the results in a columnar fashion where one row represents all the values of a certain column in the results. */
   columnar?: boolean
@@ -18140,7 +18154,9 @@ export interface EsqlListQueriesResponse {
 }
 
 export interface EsqlQueryRequest extends RequestBase {
-  /** A short version of the Accept header, e.g. json, yaml. */
+  /** A short version of the Accept header, e.g. json, yaml.
+    *
+    * `csv`, `tsv`, and `txt` formats will return results in a tabular format, excluding other metadata fields from the response. */
   format?: EsqlEsqlFormat
   /** The character to use between values within a CSV row. Only valid for the CSV format. */
   delimiter?: string
@@ -22360,6 +22376,55 @@ export interface InferenceAmazonBedrockTaskSettings {
 
 export type InferenceAmazonBedrockTaskType = 'completion' | 'text_embedding'
 
+export type InferenceAmazonSageMakerApi = 'openai' | 'elastic'
+
+export interface InferenceAmazonSageMakerServiceSettings {
+  /** A valid AWS access key that has permissions to use Amazon SageMaker and access to models for invoking requests. */
+  access_key: string
+  /** The name of the SageMaker endpoint. */
+  endpoint_name: string
+  /** The API format to use when calling SageMaker.
+    * Elasticsearch will convert the POST _inference request to this data format when invoking the SageMaker endpoint. */
+  api: InferenceAmazonSageMakerApi
+  /** The region that your endpoint or Amazon Resource Name (ARN) is deployed in.
+    * The list of available regions per model can be found in the Amazon SageMaker documentation. */
+  region: string
+  /** A valid AWS secret key that is paired with the `access_key`.
+    * For information about creating and managing access and secret keys, refer to the AWS documentation. */
+  secret_key: string
+  /** The model ID when calling a multi-model endpoint. */
+  target_model?: string
+  /** The container to directly invoke when calling a multi-container endpoint. */
+  target_container_hostname?: string
+  /** The inference component to directly invoke when calling a multi-component endpoint. */
+  inference_component_name?: string
+  /** The maximum number of inputs in each batch. This value is used by inference ingestion pipelines
+    * when processing semantic values. It correlates to the number of times the SageMaker endpoint is
+    * invoked (one per batch of input). */
+  batch_size?: integer
+  /** The number of dimensions returned by the text embedding models. If this value is not provided, then
+    * it is guessed by making invoking the endpoint for the `text_embedding` task. */
+  dimensions?: integer
+}
+
+export type InferenceAmazonSageMakerServiceType = 'amazon_sagemaker'
+
+export interface InferenceAmazonSageMakerTaskSettings {
+  /** The AWS custom attributes passed verbatim through to the model running in the SageMaker Endpoint.
+    * Values will be returned in the `X-elastic-sagemaker-custom-attributes` header. */
+  custom_attributes?: string
+  /** The optional JMESPath expression used to override the EnableExplanations provided during endpoint creation. */
+  enable_explanations?: string
+  /** The capture data ID when enabled in the endpoint. */
+  inference_id?: string
+  /** The stateful session identifier for a new or existing session.
+    * New sessions will be returned in the `X-elastic-sagemaker-new-session-id` header.
+    * Closed sessions will be returned in the `X-elastic-sagemaker-closed-session-id` header. */
+  session_id?: string
+  /** Specifies the variant when running with multi-variant Endpoints. */
+  target_variant?: string
+}
+
 export interface InferenceAnthropicServiceSettings {
   /** A valid API key for the Anthropic API. */
   api_key: string
@@ -23043,6 +23108,13 @@ export interface InferenceInferenceEndpointInfoAmazonBedrock extends InferenceIn
   task_type: InferenceTaskTypeAmazonBedrock
 }
 
+export interface InferenceInferenceEndpointInfoAmazonSageMaker extends InferenceInferenceEndpoint {
+  /** The inference Id */
+  inference_id: string
+  /** The task type */
+  task_type: InferenceTaskTypeAmazonSageMaker
+}
+
 export interface InferenceInferenceEndpointInfoAnthropic extends InferenceInferenceEndpoint {
   /** The inference Id */
   inference_id: string
@@ -23430,6 +23502,8 @@ export type InferenceTaskTypeAlibabaCloudAI = 'text_embedding' | 'rerank' | 'com
 
 export type InferenceTaskTypeAmazonBedrock = 'text_embedding' | 'completion'
 
+export type InferenceTaskTypeAmazonSageMaker = 'text_embedding' | 'completion' | 'chat_completion' | 'sparse_embedding' | 'rerank'
+
 export type InferenceTaskTypeAnthropic = 'completion'
 
 export type InferenceTaskTypeAzureAIStudio = 'text_embedding' | 'completion'
@@ -23728,6 +23802,31 @@ export interface InferencePutAmazonbedrockRequest extends RequestBase {
 
 export type InferencePutAmazonbedrockResponse = InferenceInferenceEndpointInfoAmazonBedrock
 
+export interface InferencePutAmazonsagemakerRequest extends RequestBase {
+  /** The type of the inference task that the model will perform. */
+  task_type: InferenceTaskTypeAmazonSageMaker
+  /** The unique identifier of the inference endpoint. */
+  amazonsagemaker_inference_id: Id
+  /** Specifies the amount of time to wait for the inference endpoint to be created. */
+  timeout?: Duration
+  /** The chunking configuration object. */
+  chunking_settings?: InferenceInferenceChunkingSettings
+  /** The type of service supported for the specified task type. In this case, `amazon_sagemaker`. */
+  service: InferenceAmazonSageMakerServiceType
+  /** Settings used to install the inference model.
+    * These settings are specific to the `amazon_sagemaker` service and `service_settings.api` you specified. */
+  service_settings: InferenceAmazonSageMakerServiceSettings
+  /** Settings to configure the inference task.
+    * These settings are specific to the task type and `service_settings.api` you specified. */
+  task_settings?: InferenceAmazonSageMakerTaskSettings
+  /** All values in `body` will be added to the request body. */
+  body?: string | { [key: string]: any } & { task_type?: never, amazonsagemaker_inference_id?: never, timeout?: never, chunking_settings?: never, service?: never, service_settings?: never, task_settings?: never }
+  /** All values in `querystring` will be added to the request querystring. */
+  querystring?: { [key: string]: any } & { task_type?: never, amazonsagemaker_inference_id?: never, timeout?: never, chunking_settings?: never, service?: never, service_settings?: never, task_settings?: never }
+}
+
+export type InferencePutAmazonsagemakerResponse = InferenceInferenceEndpointInfoAmazonSageMaker
+
 export interface InferencePutAnthropicRequest extends RequestBase {
   /** The task type.
     * The only valid task type for the model to perform is `completion`. */