diff --git a/elasticsearch/_async/client/__init__.py b/elasticsearch/_async/client/__init__.py index 84d1f3d4d..02ab3f85d 100644 --- a/elasticsearch/_async/client/__init__.py +++ b/elasticsearch/_async/client/__init__.py @@ -637,6 +637,8 @@ async def bulk( Imagine a _bulk?refresh=wait_for request with three documents in it that happen to be routed to different shards in an index with five shards. The request will only wait for those three shards to refresh. The other two shards that make up the index do not participate in the _bulk request at all.

+

You might want to disable the refresh interval temporarily to improve indexing throughput for large bulk requests. + Refer to the linked documentation for step-by-step instructions using the index settings API.

``_ @@ -1645,7 +1647,7 @@ async def delete_by_query( async def delete_by_query_rethrottle( self, *, - task_id: t.Union[int, str], + task_id: str, error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, @@ -2324,7 +2326,7 @@ async def get( :param index: The name of the index that contains the document. :param id: A unique document identifier. :param force_synthetic_source: Indicates whether the request forces synthetic - `_source`. Use this paramater to test if the mapping supports synthetic `_source` + `_source`. Use this parameter to test if the mapping supports synthetic `_source` and to get a sense of the worst case performance. Fetches with this parameter enabled will be slower than enabling synthetic source natively in the index. :param preference: The node or shard the operation should be performed on. By @@ -2355,8 +2357,8 @@ async def get( :param stored_fields: A comma-separated list of stored fields to return as part of a hit. If no fields are specified, no stored fields are included in the response. If this field is specified, the `_source` parameter defaults to - `false`. Only leaf fields can be retrieved with the `stored_field` option. - Object fields can't be returned;​if specified, the request fails. + `false`. Only leaf fields can be retrieved with the `stored_fields` option. + Object fields can't be returned; if specified, the request fails. :param version: The version number for concurrency control. It must match the current version of the document for the request to succeed. :param version_type: The version type. @@ -3586,8 +3588,7 @@ async def open_point_in_time( :param expand_wildcards: The type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. It supports comma-separated - values, such as `open,hidden`. Valid values are: `all`, `open`, `closed`, - `hidden`, `none`. + values, such as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param index_filter: Filter indices if the provided query rewrites to `match_none` @@ -3887,110 +3888,7 @@ async def reindex( In this case, the response includes a count of the version conflicts that were encountered. Note that the handling of other error types is unaffected by the conflicts property. Additionally, if you opt to count version conflicts, the operation could attempt to reindex more documents from the source than max_docs until it has successfully indexed max_docs documents into the target or it has gone through every document in the source query.

-

NOTE: The reindex API makes no effort to handle ID collisions. - The last document written will "win" but the order isn't usually predictable so it is not a good idea to rely on this behavior. - Instead, make sure that IDs are unique by using a script.

-

Running reindex asynchronously

-

If the request contains wait_for_completion=false, Elasticsearch performs some preflight checks, launches the request, and returns a task you can use to cancel or get the status of the task. - Elasticsearch creates a record of this task as a document at _tasks/<task_id>.

-

Reindex from multiple sources

-

If you have many sources to reindex it is generally better to reindex them one at a time rather than using a glob pattern to pick up multiple sources. - That way you can resume the process if there are any errors by removing the partially completed source and starting over. - It also makes parallelizing the process fairly simple: split the list of sources to reindex and run each list in parallel.

-

For example, you can use a bash script like this:

-
for index in i1 i2 i3 i4 i5; do
-            curl -HContent-Type:application/json -XPOST localhost:9200/_reindex?pretty -d'{
-              "source": {
-                "index": "'$index'"
-              },
-              "dest": {
-                "index": "'$index'-reindexed"
-              }
-            }'
-          done
-          
-

Throttling

-

Set requests_per_second to any positive decimal number (1.4, 6, 1000, for example) to throttle the rate at which reindex issues batches of index operations. - Requests are throttled by padding each batch with a wait time. - To turn off throttling, set requests_per_second to -1.

-

The throttling is done by waiting between batches so that the scroll that reindex uses internally can be given a timeout that takes into account the padding. - The padding time is the difference between the batch size divided by the requests_per_second and the time spent writing. - By default the batch size is 1000, so if requests_per_second is set to 500:

-
target_time = 1000 / 500 per second = 2 seconds
-          wait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds
-          
-

Since the batch is issued as a single bulk request, large batch sizes cause Elasticsearch to create many requests and then wait for a while before starting the next set. - This is "bursty" instead of "smooth".

-

Slicing

-

Reindex supports sliced scroll to parallelize the reindexing process. - This parallelization can improve efficiency and provide a convenient way to break the request down into smaller parts.

-

NOTE: Reindexing from remote clusters does not support manual or automatic slicing.

-

You can slice a reindex request manually by providing a slice ID and total number of slices to each request. - You can also let reindex automatically parallelize by using sliced scroll to slice on _id. - The slices parameter specifies the number of slices to use.

-

Adding slices to the reindex request just automates the manual process, creating sub-requests which means it has some quirks:

- -

If slicing automatically, setting slices to auto will choose a reasonable number for most indices. - If slicing manually or otherwise tuning automatic slicing, use the following guidelines.

-

Query performance is most efficient when the number of slices is equal to the number of shards in the index. - If that number is large (for example, 500), choose a lower number as too many slices will hurt performance. - Setting slices higher than the number of shards generally does not improve efficiency and adds overhead.

-

Indexing performance scales linearly across available resources with the number of slices.

-

Whether query or indexing performance dominates the runtime depends on the documents being reindexed and cluster resources.

-

Modify documents during reindexing

-

Like _update_by_query, reindex operations support a script that modifies the document. - Unlike _update_by_query, the script is allowed to modify the document's metadata.

-

Just as in _update_by_query, you can set ctx.op to change the operation that is run on the destination. - For example, set ctx.op to noop if your script decides that the document doesn’t have to be indexed in the destination. This "no operation" will be reported in the noop counter in the response body. - Set ctx.op to delete if your script decides that the document must be deleted from the destination. - The deletion will be reported in the deleted counter in the response body. - Setting ctx.op to anything else will return an error, as will setting any other field in ctx.

-

Think of the possibilities! Just be careful; you are able to change:

- -

Setting _version to null or clearing it from the ctx map is just like not sending the version in an indexing request. - It will cause the document to be overwritten in the destination regardless of the version on the target or the version type you use in the reindex API.

-

Reindex from remote

-

Reindex supports reindexing from a remote Elasticsearch cluster. - The host parameter must contain a scheme, host, port, and optional path. - The username and password parameters are optional and when they are present the reindex operation will connect to the remote Elasticsearch node using basic authentication. - Be sure to use HTTPS when using basic authentication or the password will be sent in plain text. - There are a range of settings available to configure the behavior of the HTTPS connection.

-

When using Elastic Cloud, it is also possible to authenticate against the remote cluster through the use of a valid API key. - Remote hosts must be explicitly allowed with the reindex.remote.whitelist setting. - It can be set to a comma delimited list of allowed remote host and port combinations. - Scheme is ignored; only the host and port are used. - For example:

-
reindex.remote.whitelist: [otherhost:9200, another:9200, 127.0.10.*:9200, localhost:*"]
-          
-

The list of allowed hosts must be configured on any nodes that will coordinate the reindex. - This feature should work with remote clusters of any version of Elasticsearch. - This should enable you to upgrade from any version of Elasticsearch to the current version by reindexing from a cluster of the old version.

-

WARNING: Elasticsearch does not support forward compatibility across major versions. - For example, you cannot reindex from a 7.x cluster into a 6.x cluster.

-

To enable queries sent to older versions of Elasticsearch, the query parameter is sent directly to the remote host without validation or modification.

-

NOTE: Reindexing from remote clusters does not support manual or automatic slicing.

-

Reindexing from a remote server uses an on-heap buffer that defaults to a maximum size of 100mb. - If the remote index includes very large documents you'll need to use a smaller batch size. - It is also possible to set the socket read timeout on the remote connection with the socket_timeout field and the connection timeout with the connect_timeout field. - Both default to 30 seconds.

-

Configuring SSL parameters

-

Reindex from remote supports configurable SSL settings. - These must be specified in the elasticsearch.yml file, with the exception of the secure settings, which you add in the Elasticsearch keystore. - It is not possible to configure SSL in the body of the reindex request.

+

Refer to the linked documentation for examples of how to reindex documents.

``_ @@ -4652,11 +4550,11 @@ async def search( of the specified nodes are available, select shards from any available node using the default method. * `_prefer_nodes:,` to if possible, run the search on the specified nodes IDs. If not, select shards using the - default method. `_shards:,` to run the search only on the specified - shards. You can combine this value with other `preference` values. However, - the `_shards` value must come first. For example: `_shards:2,3|_local`. `` - (any string that does not start with `_`) to route searches with the same - `` to the same shards in the same order. + default method. * `_shards:,` to run the search only on the + specified shards. You can combine this value with other `preference` values. + However, the `_shards` value must come first. For example: `_shards:2,3|_local`. + * `` (any string that does not start with `_`) to route searches + with the same `` to the same shards in the same order. :param profile: Set to `true` to return detailed timing information about the execution of individual components in a search request. NOTE: This is a debugging tool and adds significant overhead to search execution. @@ -4992,51 +4890,6 @@ async def search_mvt(
  • Optionally, a geo_bounds aggregation on the <field>. The search only includes this aggregation if the exact_bounds parameter is true.
  • If the optional parameter with_labels is true, the internal search will include a dynamic runtime field that calls the getLabelPosition function of the geometry doc value. This enables the generation of new point features containing suggested geometry labels, so that, for example, multi-polygons will have only one label.
  • -

    For example, Elasticsearch may translate a vector tile search API request with a grid_agg argument of geotile and an exact_bounds argument of true into the following search

    -
    GET my-index/_search
    -          {
    -            "size": 10000,
    -            "query": {
    -              "geo_bounding_box": {
    -                "my-geo-field": {
    -                  "top_left": {
    -                    "lat": -40.979898069620134,
    -                    "lon": -45
    -                  },
    -                  "bottom_right": {
    -                    "lat": -66.51326044311186,
    -                    "lon": 0
    -                  }
    -                }
    -              }
    -            },
    -            "aggregations": {
    -              "grid": {
    -                "geotile_grid": {
    -                  "field": "my-geo-field",
    -                  "precision": 11,
    -                  "size": 65536,
    -                  "bounds": {
    -                    "top_left": {
    -                      "lat": -40.979898069620134,
    -                      "lon": -45
    -                    },
    -                    "bottom_right": {
    -                      "lat": -66.51326044311186,
    -                      "lon": 0
    -                    }
    -                  }
    -                }
    -              },
    -              "bounds": {
    -                "geo_bounds": {
    -                  "field": "my-geo-field",
    -                  "wrap_longitude": false
    -                }
    -              }
    -            }
    -          }
    -          

    The API returns results as a binary Mapbox vector tile. Mapbox vector tiles are encoded as Google Protobufs (PBF). By default, the tile contains three layers:

      @@ -5291,6 +5144,7 @@ async def search_mvt( Some cells may intersect more than one vector tile. To compute the H3 resolution for each precision, Elasticsearch compares the average density of hexagonal bins at each resolution with the average density of tile bins at each zoom level. Elasticsearch uses the H3 resolution that is closest to the corresponding geotile density.

      +

      Learn how to use the vector tile search API with practical examples in the Vector tile search examples guide.

      ``_ @@ -5480,7 +5334,7 @@ async def search_shards( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param local: If `true`, the request retrieves information from the local node @@ -5592,8 +5446,7 @@ async def search_template( :param expand_wildcards: The type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated - values, such as `open,hidden`. Valid values are: `all`, `open`, `closed`, - `hidden`, `none`. + values, such as `open,hidden`. :param explain: If `true`, returns detailed information about score calculation as part of each hit. If you specify both this and the `explain` query parameter, the API uses only the query parameter. @@ -5867,7 +5720,8 @@ async def termvectors( The information is only retrieved for the shard the requested document resides in. The term and field statistics are therefore only useful as relative measures whereas the absolute numbers have no meaning in this context. By default, when requesting term vectors of artificial documents, a shard to get the statistics from is randomly selected. - Use routing only to hit a particular shard.

      + Use routing only to hit a particular shard. + Refer to the linked documentation for detailed examples of how to use this API.

      ``_ @@ -6038,7 +5892,8 @@ async def update(

    The document must still be reindexed, but using this API removes some network roundtrips and reduces chances of version conflicts between the GET and the index operation.

    The _source field must be enabled to use this API. - In addition to _source, you can access the following variables through the ctx map: _index, _type, _id, _version, _routing, and _now (the current timestamp).

    + In addition to _source, you can access the following variables through the ctx map: _index, _type, _id, _version, _routing, and _now (the current timestamp). + For usage examples such as partial updates, upserts, and scripted updates, see the External documentation.

    ``_ @@ -6231,6 +6086,24 @@ async def update_by_query( A bulk update request is performed for each batch of matching documents. Any query or update failures cause the update by query request to fail and the failures are shown in the response. Any update requests that completed successfully still stick, they are not rolled back.

    +

    Refreshing shards

    +

    Specifying the refresh parameter refreshes all shards once the request completes. + This is different to the update API's refresh parameter, which causes only the shard + that received the request to be refreshed. Unlike the update API, it does not support + wait_for.

    +

    Running update by query asynchronously

    +

    If the request contains wait_for_completion=false, Elasticsearch + performs some preflight checks, launches the request, and returns a + task you can use to cancel or get the status of the task. + Elasticsearch creates a record of this task as a document at .tasks/task/${taskId}.

    +

    Waiting for active shards

    +

    wait_for_active_shards controls how many copies of a shard must be active + before proceeding with the request. See wait_for_active_shards + for details. timeout controls how long each write request waits for unavailable + shards to become available. Both work exactly the way they work in the + Bulk API. Update by query uses scrolled searches, so you can also + specify the scroll parameter to control how long it keeps the search context + alive, for example ?scroll=10m. The default is 5 minutes.

    Throttling update requests

    To control the rate at which update by query issues batches of update operations, you can set requests_per_second to any positive decimal number. This pads each batch with a wait time to throttle the rate. @@ -6265,18 +6138,8 @@ async def update_by_query(

  • Query performance is most efficient when the number of slices is equal to the number of shards in the index or backing index. If that number is large (for example, 500), choose a lower number as too many slices hurts performance. Setting slices higher than the number of shards generally does not improve efficiency and adds overhead.
  • Update performance scales linearly across available resources with the number of slices.
  • -

    Whether query or update performance dominates the runtime depends on the documents being reindexed and cluster resources.

    -

    Update the document source

    -

    Update by query supports scripts to update the document source. - As with the update API, you can set ctx.op to change the operation that is performed.

    -

    Set ctx.op = "noop" if your script decides that it doesn't have to make any changes. - The update by query operation skips updating the document and increments the noop counter.

    -

    Set ctx.op = "delete" if your script decides that the document should be deleted. - The update by query operation deletes the document and increments the deleted counter.

    -

    Update by query supports only index, noop, and delete. - Setting ctx.op to anything else is an error. - Setting any other field in ctx is an error. - This API enables you to only modify the source of matching documents; you cannot move them.

    +

    Whether query or update performance dominates the runtime depends on the documents being reindexed and cluster resources. + Refer to the linked documentation for examples of how to update documents using the _update_by_query API:

    ``_ @@ -6304,8 +6167,7 @@ async def update_by_query( :param expand_wildcards: The type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. It supports comma-separated - values, such as `open,hidden`. Valid values are: `all`, `open`, `closed`, - `hidden`, `none`. + values, such as `open,hidden`. :param from_: Skips the specified number of documents. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. diff --git a/elasticsearch/_async/client/cat.py b/elasticsearch/_async/client/cat.py index 299ee83ac..960f2f721 100644 --- a/elasticsearch/_async/client/cat.py +++ b/elasticsearch/_async/client/cat.py @@ -1767,7 +1767,200 @@ async def nodes( filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, format: t.Optional[str] = None, full_id: t.Optional[t.Union[bool, str]] = None, - h: t.Optional[t.Union[str, t.Sequence[str]]] = None, + h: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[ + str, + t.Literal[ + "build", + "completion.size", + "cpu", + "disk.avail", + "disk.total", + "disk.used", + "disk.used_percent", + "fielddata.evictions", + "fielddata.memory_size", + "file_desc.current", + "file_desc.max", + "file_desc.percent", + "flush.total", + "flush.total_time", + "get.current", + "get.exists_time", + "get.exists_total", + "get.missing_time", + "get.missing_total", + "get.time", + "get.total", + "heap.current", + "heap.max", + "heap.percent", + "http_address", + "id", + "indexing.delete_current", + "indexing.delete_time", + "indexing.delete_total", + "indexing.index_current", + "indexing.index_failed", + "indexing.index_failed_due_to_version_conflict", + "indexing.index_time", + "indexing.index_total", + "ip", + "jdk", + "load_15m", + "load_1m", + "load_5m", + "mappings.total_count", + "mappings.total_estimated_overhead_in_bytes", + "master", + "merges.current", + "merges.current_docs", + "merges.current_size", + "merges.total", + "merges.total_docs", + "merges.total_size", + "merges.total_time", + "name", + "node.role", + "pid", + "port", + "query_cache.evictions", + "query_cache.hit_count", + "query_cache.memory_size", + "query_cache.miss_count", + "ram.current", + "ram.max", + "ram.percent", + "refresh.time", + "refresh.total", + "request_cache.evictions", + "request_cache.hit_count", + "request_cache.memory_size", + "request_cache.miss_count", + "script.cache_evictions", + "script.compilations", + "search.fetch_current", + "search.fetch_time", + "search.fetch_total", + "search.open_contexts", + "search.query_current", + "search.query_time", + "search.query_total", + "search.scroll_current", + "search.scroll_time", + "search.scroll_total", + "segments.count", + "segments.fixed_bitset_memory", + "segments.index_writer_memory", + "segments.memory", + "segments.version_map_memory", + "shard_stats.total_count", + "suggest.current", + "suggest.time", + "suggest.total", + "uptime", + "version", + ], + ] + ], + t.Union[ + str, + t.Literal[ + "build", + "completion.size", + "cpu", + "disk.avail", + "disk.total", + "disk.used", + "disk.used_percent", + "fielddata.evictions", + "fielddata.memory_size", + "file_desc.current", + "file_desc.max", + "file_desc.percent", + "flush.total", + "flush.total_time", + "get.current", + "get.exists_time", + "get.exists_total", + "get.missing_time", + "get.missing_total", + "get.time", + "get.total", + "heap.current", + "heap.max", + "heap.percent", + "http_address", + "id", + "indexing.delete_current", + "indexing.delete_time", + "indexing.delete_total", + "indexing.index_current", + "indexing.index_failed", + "indexing.index_failed_due_to_version_conflict", + "indexing.index_time", + "indexing.index_total", + "ip", + "jdk", + "load_15m", + "load_1m", + "load_5m", + "mappings.total_count", + "mappings.total_estimated_overhead_in_bytes", + "master", + "merges.current", + "merges.current_docs", + "merges.current_size", + "merges.total", + "merges.total_docs", + "merges.total_size", + "merges.total_time", + "name", + "node.role", + "pid", + "port", + "query_cache.evictions", + "query_cache.hit_count", + "query_cache.memory_size", + "query_cache.miss_count", + "ram.current", + "ram.max", + "ram.percent", + "refresh.time", + "refresh.total", + "request_cache.evictions", + "request_cache.hit_count", + "request_cache.memory_size", + "request_cache.miss_count", + "script.cache_evictions", + "script.compilations", + "search.fetch_current", + "search.fetch_time", + "search.fetch_total", + "search.open_contexts", + "search.query_current", + "search.query_time", + "search.query_total", + "search.scroll_current", + "search.scroll_time", + "search.scroll_total", + "segments.count", + "segments.fixed_bitset_memory", + "segments.index_writer_memory", + "segments.memory", + "segments.version_map_memory", + "shard_stats.total_count", + "suggest.current", + "suggest.time", + "suggest.total", + "uptime", + "version", + ], + ], + ] + ] = None, help: t.Optional[bool] = None, human: t.Optional[bool] = None, include_unloaded_segments: t.Optional[bool] = None, @@ -1794,16 +1987,17 @@ async def nodes( to `text`, `json`, `cbor`, `yaml`, or `smile`. :param full_id: If `true`, return the full node ID. If `false`, return the shortened node ID. - :param h: List of columns to appear in the response. Supports simple wildcards. + :param h: A comma-separated list of columns names to display. It supports simple + wildcards. :param help: When set to `true` will output available columns. This option can't be combined with any other query string option. :param include_unloaded_segments: If true, the response includes information from segments that are not loaded into memory. - :param master_timeout: Period to wait for a connection to the master node. - :param s: List of columns that determine how the table should be sorted. Sorting - defaults to ascending and can be changed by setting `:asc` or `:desc` as - a suffix to the column name. - :param time: Unit used to display time values. + :param master_timeout: The period to wait for a connection to the master node. + :param s: A comma-separated list of column names or aliases that determines the + sort order. Sorting defaults to ascending and can be changed by setting `:asc` + or `:desc` as a suffix to the column name. + :param time: The unit used to display time values. :param v: When set to `true` will enable verbose output. """ __path_parts: t.Dict[str, str] = {} @@ -2022,7 +2216,74 @@ async def recovery( error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, format: t.Optional[str] = None, - h: t.Optional[t.Union[str, t.Sequence[str]]] = None, + h: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[ + str, + t.Literal[ + "bytes", + "bytes_percent", + "bytes_recovered", + "bytes_total", + "files", + "files_percent", + "files_recovered", + "files_total", + "index", + "repository", + "shard", + "snapshot", + "source_host", + "source_node", + "stage", + "start_time", + "start_time_millis", + "stop_time", + "stop_time_millis", + "target_host", + "target_node", + "time", + "translog_ops", + "translog_ops_percent", + "translog_ops_recovered", + "type", + ], + ] + ], + t.Union[ + str, + t.Literal[ + "bytes", + "bytes_percent", + "bytes_recovered", + "bytes_total", + "files", + "files_percent", + "files_recovered", + "files_total", + "index", + "repository", + "shard", + "snapshot", + "source_host", + "source_node", + "stage", + "start_time", + "start_time_millis", + "stop_time", + "stop_time_millis", + "target_host", + "target_node", + "time", + "translog_ops", + "translog_ops_percent", + "translog_ops_recovered", + "type", + ], + ], + ] + ] = None, help: t.Optional[bool] = None, human: t.Optional[bool] = None, pretty: t.Optional[bool] = None, @@ -2053,13 +2314,14 @@ async def recovery( shard recoveries. :param format: Specifies the format to return the columnar data in, can be set to `text`, `json`, `cbor`, `yaml`, or `smile`. - :param h: List of columns to appear in the response. Supports simple wildcards. + :param h: A comma-separated list of columns names to display. It supports simple + wildcards. :param help: When set to `true` will output available columns. This option can't be combined with any other query string option. - :param s: List of columns that determine how the table should be sorted. Sorting - defaults to ascending and can be changed by setting `:asc` or `:desc` as - a suffix to the column name. - :param time: Unit used to display time values. + :param s: A comma-separated list of column names or aliases that determines the + sort order. Sorting defaults to ascending and can be changed by setting `:asc` + or `:desc` as a suffix to the column name. + :param time: The unit used to display time values. :param v: When set to `true` will enable verbose output. """ __path_parts: t.Dict[str, str] @@ -2193,7 +2455,52 @@ async def segments( error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, format: t.Optional[str] = None, - h: t.Optional[t.Union[str, t.Sequence[str]]] = None, + h: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[ + str, + t.Literal[ + "committed", + "compound", + "docs.count", + "docs.deleted", + "generation", + "id", + "index", + "ip", + "prirep", + "searchable", + "segment", + "shard", + "size", + "size.memory", + "version", + ], + ] + ], + t.Union[ + str, + t.Literal[ + "committed", + "compound", + "docs.count", + "docs.deleted", + "generation", + "id", + "index", + "ip", + "prirep", + "searchable", + "segment", + "shard", + "size", + "size.memory", + "version", + ], + ], + ] + ] = None, help: t.Optional[bool] = None, human: t.Optional[bool] = None, local: t.Optional[bool] = None, @@ -2219,7 +2526,8 @@ async def segments( :param bytes: The unit used to display byte values. :param format: Specifies the format to return the columnar data in, can be set to `text`, `json`, `cbor`, `yaml`, or `smile`. - :param h: List of columns to appear in the response. Supports simple wildcards. + :param h: A comma-separated list of columns names to display. It supports simple + wildcards. :param help: When set to `true` will output available columns. This option can't be combined with any other query string option. :param local: If `true`, the request computes the list of selected nodes from @@ -2227,9 +2535,9 @@ async def segments( from the cluster state of the master node. In both cases the coordinating node will send requests for further information to each selected node. :param master_timeout: Period to wait for a connection to the master node. - :param s: List of columns that determine how the table should be sorted. Sorting - defaults to ascending and can be changed by setting `:asc` or `:desc` as - a suffix to the column name. + :param s: A comma-separated list of column names or aliases that determines the + sort order. Sorting defaults to ascending and can be changed by setting `:asc` + or `:desc` as a suffix to the column name. :param v: When set to `true` will enable verbose output. """ __path_parts: t.Dict[str, str] @@ -2285,7 +2593,162 @@ async def shards( error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, format: t.Optional[str] = None, - h: t.Optional[t.Union[str, t.Sequence[str]]] = None, + h: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[ + str, + t.Literal[ + "completion.size", + "dataset.size", + "dense_vector.value_count", + "docs", + "dsparse_vector.value_count", + "fielddata.evictions", + "fielddata.memory_size", + "flush.total", + "flush.total_time", + "get.current", + "get.exists_time", + "get.exists_total", + "get.missing_time", + "get.missing_total", + "get.time", + "get.total", + "id", + "index", + "indexing.delete_current", + "indexing.delete_time", + "indexing.delete_total", + "indexing.index_current", + "indexing.index_failed", + "indexing.index_failed_due_to_version_conflict", + "indexing.index_time", + "indexing.index_total", + "ip", + "merges.current", + "merges.current_docs", + "merges.current_size", + "merges.total", + "merges.total_docs", + "merges.total_size", + "merges.total_time", + "node", + "prirep", + "query_cache.evictions", + "query_cache.memory_size", + "recoverysource.type", + "refresh.time", + "refresh.total", + "search.fetch_current", + "search.fetch_time", + "search.fetch_total", + "search.open_contexts", + "search.query_current", + "search.query_time", + "search.query_total", + "search.scroll_current", + "search.scroll_time", + "search.scroll_total", + "segments.count", + "segments.fixed_bitset_memory", + "segments.index_writer_memory", + "segments.memory", + "segments.version_map_memory", + "seq_no.global_checkpoint", + "seq_no.local_checkpoint", + "seq_no.max", + "shard", + "state", + "store", + "suggest.current", + "suggest.time", + "suggest.total", + "sync_id", + "unassigned.at", + "unassigned.details", + "unassigned.for", + "unassigned.reason", + ], + ] + ], + t.Union[ + str, + t.Literal[ + "completion.size", + "dataset.size", + "dense_vector.value_count", + "docs", + "dsparse_vector.value_count", + "fielddata.evictions", + "fielddata.memory_size", + "flush.total", + "flush.total_time", + "get.current", + "get.exists_time", + "get.exists_total", + "get.missing_time", + "get.missing_total", + "get.time", + "get.total", + "id", + "index", + "indexing.delete_current", + "indexing.delete_time", + "indexing.delete_total", + "indexing.index_current", + "indexing.index_failed", + "indexing.index_failed_due_to_version_conflict", + "indexing.index_time", + "indexing.index_total", + "ip", + "merges.current", + "merges.current_docs", + "merges.current_size", + "merges.total", + "merges.total_docs", + "merges.total_size", + "merges.total_time", + "node", + "prirep", + "query_cache.evictions", + "query_cache.memory_size", + "recoverysource.type", + "refresh.time", + "refresh.total", + "search.fetch_current", + "search.fetch_time", + "search.fetch_total", + "search.open_contexts", + "search.query_current", + "search.query_time", + "search.query_total", + "search.scroll_current", + "search.scroll_time", + "search.scroll_total", + "segments.count", + "segments.fixed_bitset_memory", + "segments.index_writer_memory", + "segments.memory", + "segments.version_map_memory", + "seq_no.global_checkpoint", + "seq_no.local_checkpoint", + "seq_no.max", + "shard", + "state", + "store", + "suggest.current", + "suggest.time", + "suggest.total", + "sync_id", + "unassigned.at", + "unassigned.details", + "unassigned.for", + "unassigned.reason", + ], + ], + ] + ] = None, help: t.Optional[bool] = None, human: t.Optional[bool] = None, master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, @@ -2316,11 +2779,11 @@ async def shards( :param h: List of columns to appear in the response. Supports simple wildcards. :param help: When set to `true` will output available columns. This option can't be combined with any other query string option. - :param master_timeout: Period to wait for a connection to the master node. - :param s: List of columns that determine how the table should be sorted. Sorting - defaults to ascending and can be changed by setting `:asc` or `:desc` as - a suffix to the column name. - :param time: Unit used to display time values. + :param master_timeout: The period to wait for a connection to the master node. + :param s: A comma-separated list of column names or aliases that determines the + sort order. Sorting defaults to ascending and can be changed by setting `:asc` + or `:desc` as a suffix to the column name. + :param time: The unit used to display time values. :param v: When set to `true` will enable verbose output. """ __path_parts: t.Dict[str, str] @@ -2373,7 +2836,124 @@ async def snapshots( error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, format: t.Optional[str] = None, - h: t.Optional[t.Union[str, t.Sequence[str]]] = None, + h: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[ + str, + t.Literal[ + "build", + "completion.size", + "cpu", + "disk.avail", + "disk.total", + "disk.used", + "disk.used_percent", + "fielddata.evictions", + "fielddata.memory_size", + "file_desc.current", + "file_desc.max", + "file_desc.percent", + "flush.total", + "flush.total_time", + "get.current", + "get.exists_time", + "get.exists_total", + "get.missing_time", + "get.missing_total", + "get.time", + "get.total", + "heap.current", + "heap.max", + "heap.percent", + "http_address", + "id", + "indexing.delete_current", + "indexing.delete_time", + "indexing.delete_total", + "indexing.index_current", + "indexing.index_failed", + "indexing.index_failed_due_to_version_conflict", + "indexing.index_time", + "indexing.index_total", + "ip", + "jdk", + "load_15m", + "load_1m", + "load_5m", + "mappings.total_count", + "mappings.total_estimated_overhead_in_bytes", + "master", + "merges.current", + "merges.current_docs", + "merges.current_size", + "merges.total", + "merges.total_docs", + "merges.total_size", + "merges.total_time", + "name", + "node.role", + "pid", + "port", + "query_cache.evictions", + "query_cache.hit_count", + "query_cache.memory_size", + "query_cache.miss_count", + "ram.current", + "ram.max", + "ram.percent", + "refresh.time", + "refresh.total", + "request_cache.evictions", + "request_cache.hit_count", + "request_cache.memory_size", + "request_cache.miss_count", + "script.cache_evictions", + "script.compilations", + "search.fetch_current", + "search.fetch_time", + "search.fetch_total", + "search.open_contexts", + "search.query_current", + "search.query_time", + "search.query_total", + "search.scroll_current", + "search.scroll_time", + "search.scroll_total", + "segments.count", + "segments.fixed_bitset_memory", + "segments.index_writer_memory", + "segments.memory", + "segments.version_map_memory", + "shard_stats.total_count", + "suggest.current", + "suggest.time", + "suggest.total", + "uptime", + "version", + ], + ] + ], + t.Union[ + str, + t.Literal[ + "duration", + "end_epoch", + "end_time", + "failed_shards", + "id", + "indices", + "reason", + "repository", + "start_epoch", + "start_time", + "status", + "successful_shards", + "total_shards", + ], + ], + ] + ] = None, help: t.Optional[bool] = None, human: t.Optional[bool] = None, ignore_unavailable: t.Optional[bool] = None, @@ -2401,7 +2981,8 @@ async def snapshots( If any repository fails during the request, Elasticsearch returns an error. :param format: Specifies the format to return the columnar data in, can be set to `text`, `json`, `cbor`, `yaml`, or `smile`. - :param h: List of columns to appear in the response. Supports simple wildcards. + :param h: A comma-separated list of columns names to display. It supports simple + wildcards. :param help: When set to `true` will output available columns. This option can't be combined with any other query string option. :param ignore_unavailable: If `true`, the response does not include information @@ -2648,7 +3229,62 @@ async def thread_pool( error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, format: t.Optional[str] = None, - h: t.Optional[t.Union[str, t.Sequence[str]]] = None, + h: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[ + str, + t.Literal[ + "active", + "completed", + "core", + "ephemeral_id", + "host", + "ip", + "keep_alive", + "largest", + "max", + "name", + "node_id", + "node_name", + "pid", + "pool_size", + "port", + "queue", + "queue_size", + "rejected", + "size", + "type", + ], + ] + ], + t.Union[ + str, + t.Literal[ + "active", + "completed", + "core", + "ephemeral_id", + "host", + "ip", + "keep_alive", + "largest", + "max", + "name", + "node_id", + "node_name", + "pid", + "pool_size", + "port", + "queue", + "queue_size", + "rejected", + "size", + "type", + ], + ], + ] + ] = None, help: t.Optional[bool] = None, human: t.Optional[bool] = None, local: t.Optional[bool] = None, @@ -2682,10 +3318,10 @@ async def thread_pool( the local cluster state. If `false` the list of selected nodes are computed from the cluster state of the master node. In both cases the coordinating node will send requests for further information to each selected node. - :param master_timeout: Period to wait for a connection to the master node. - :param s: List of columns that determine how the table should be sorted. Sorting - defaults to ascending and can be changed by setting `:asc` or `:desc` as - a suffix to the column name. + :param master_timeout: The period to wait for a connection to the master node. + :param s: A comma-separated list of column names or aliases that determines the + sort order. Sorting defaults to ascending and can be changed by setting `:asc` + or `:desc` as a suffix to the column name. :param time: The unit used to display time values. :param v: When set to `true` will enable verbose output. """ diff --git a/elasticsearch/_async/client/cluster.py b/elasticsearch/_async/client/cluster.py index 074bdc0e8..9a2d4004c 100644 --- a/elasticsearch/_async/client/cluster.py +++ b/elasticsearch/_async/client/cluster.py @@ -51,7 +51,8 @@ async def allocation_explain( Get explanations for shard allocations in the cluster. For unassigned shards, it provides an explanation for why the shard is unassigned. For assigned shards, it provides an explanation for why the shard is remaining on its current node and has not moved or rebalanced to another node. - This API can be very useful when attempting to diagnose why a shard is unassigned or why a shard continues to remain on its current node when you might expect otherwise.

    + This API can be very useful when attempting to diagnose why a shard is unassigned or why a shard continues to remain on its current node when you might expect otherwise. + Refer to the linked documentation for examples of how to troubleshoot allocation issues using this API.

    ``_ @@ -361,8 +362,8 @@ async def get_settings( """ .. raw:: html -

    Get cluster-wide settings. - By default, it returns only settings that have been explicitly defined.

    +

    Get cluster-wide settings.

    +

    By default, it returns only settings that have been explicitly defined.

    ``_ @@ -870,9 +871,9 @@ async def put_settings( :param flat_settings: Return settings in flat format (default: false) :param master_timeout: Explicit operation timeout for connection to master node - :param persistent: + :param persistent: The settings that persist after the cluster restarts. :param timeout: Explicit operation timeout - :param transient: + :param transient: The settings that do not persist after the cluster restarts. """ __path_parts: t.Dict[str, str] = {} __path = "/_cluster/settings" diff --git a/elasticsearch/_async/client/esql.py b/elasticsearch/_async/client/esql.py index 85eec185d..e35b0c5ef 100644 --- a/elasticsearch/_async/client/esql.py +++ b/elasticsearch/_async/client/esql.py @@ -31,6 +31,8 @@ class EsqlClient(NamespacedClient): "columnar", "filter", "include_ccs_metadata", + "keep_alive", + "keep_on_completion", "locale", "params", "profile", @@ -88,7 +90,9 @@ async def async_query( parameter, runs it, and returns the results. :param allow_partial_results: If `true`, partial results will be returned if there are shard failures, but the query can continue to execute on other - clusters and shards. + clusters and shards. If `false`, the query will fail if there are any failures. + To override the default behavior, you can set the `esql.query.allow_partial_results` + cluster setting to `false`. :param columnar: By default, ES|QL returns results as rows. For example, FROM returns each individual document as one row. For the JSON, YAML, CBOR and smile formats, ES|QL can return the results in a columnar fashion where one @@ -151,10 +155,6 @@ async def async_query( __query["format"] = format if human is not None: __query["human"] = human - if keep_alive is not None: - __query["keep_alive"] = keep_alive - if keep_on_completion is not None: - __query["keep_on_completion"] = keep_on_completion if pretty is not None: __query["pretty"] = pretty if not __body: @@ -166,6 +166,10 @@ async def async_query( __body["filter"] = filter if include_ccs_metadata is not None: __body["include_ccs_metadata"] = include_ccs_metadata + if keep_alive is not None: + __body["keep_alive"] = keep_alive + if keep_on_completion is not None: + __body["keep_on_completion"] = keep_on_completion if locale is not None: __body["locale"] = locale if params is not None: @@ -248,6 +252,14 @@ async def async_query_get( drop_null_columns: t.Optional[bool] = None, error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + format: t.Optional[ + t.Union[ + str, + t.Literal[ + "arrow", "cbor", "csv", "json", "smile", "tsv", "txt", "yaml" + ], + ] + ] = None, human: t.Optional[bool] = None, keep_alive: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, pretty: t.Optional[bool] = None, @@ -273,6 +285,7 @@ async def async_query_get( will be removed from the `columns` and `values` portion of the results. If `true`, the response will include an extra section under the name `all_columns` which has the name of all the columns. + :param format: A short version of the Accept header, for example `json` or `yaml`. :param keep_alive: The period for which the query and its results are stored in the cluster. When this period expires, the query and its results are deleted, even if the query is still ongoing. @@ -293,6 +306,8 @@ async def async_query_get( __query["error_trace"] = error_trace if filter_path is not None: __query["filter_path"] = filter_path + if format is not None: + __query["format"] = format if human is not None: __query["human"] = human if keep_alive is not None: @@ -366,6 +381,87 @@ async def async_query_stop( path_parts=__path_parts, ) + @_rewrite_parameters() + @_stability_warning(Stability.EXPERIMENTAL) + async def get_query( + self, + *, + id: str, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Get a specific running ES|QL query information. + Returns an object extended information about a running ES|QL query.

    + + + :param id: The query ID + """ + if id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'id'") + __path_parts: t.Dict[str, str] = {"id": _quote(id)} + __path = f'/_query/queries/{__path_parts["id"]}' + __query: t.Dict[str, t.Any] = {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + __headers = {"accept": "application/json"} + return await self.perform_request( # type: ignore[return-value] + "GET", + __path, + params=__query, + headers=__headers, + endpoint_id="esql.get_query", + path_parts=__path_parts, + ) + + @_rewrite_parameters() + @_stability_warning(Stability.EXPERIMENTAL) + async def list_queries( + self, + *, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Get running ES|QL queries information. + Returns an object containing IDs and other information about the running ES|QL queries.

    + + """ + __path_parts: t.Dict[str, str] = {} + __path = "/_query/queries" + __query: t.Dict[str, t.Any] = {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + __headers = {"accept": "application/json"} + return await self.perform_request( # type: ignore[return-value] + "GET", + __path, + params=__query, + headers=__headers, + endpoint_id="esql.list_queries", + path_parts=__path_parts, + ) + @_rewrite_parameters( body_fields=( "query", @@ -422,7 +518,9 @@ async def query( parameter, runs it, and returns the results. :param allow_partial_results: If `true`, partial results will be returned if there are shard failures, but the query can continue to execute on other - clusters and shards. + clusters and shards. If `false`, the query will fail if there are any failures. + To override the default behavior, you can set the `esql.query.allow_partial_results` + cluster setting to `false`. :param columnar: By default, ES|QL returns results as rows. For example, FROM returns each individual document as one row. For the JSON, YAML, CBOR and smile formats, ES|QL can return the results in a columnar fashion where one diff --git a/elasticsearch/_async/client/indices.py b/elasticsearch/_async/client/indices.py index f2535776f..adcde9942 100644 --- a/elasticsearch/_async/client/indices.py +++ b/elasticsearch/_async/client/indices.py @@ -338,7 +338,7 @@ async def clear_cache( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param fielddata: If `true`, clears the fields cache. Use the `fields` parameter to clear the cache of specific fields only. :param fields: Comma-separated list of field names used to limit the `fielddata` @@ -563,7 +563,7 @@ async def close( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param master_timeout: Period to wait for a connection to the master node. If @@ -656,7 +656,15 @@ async def create( ``_ - :param index: Name of the index you wish to create. + :param index: Name of the index you wish to create. Index names must meet the + following criteria: * Lowercase only * Cannot include `\\`, `/`, `*`, `?`, + `"`, `<`, `>`, `|`, ` ` (space character), `,`, or `#` * Indices prior to + 7.0 could contain a colon (`:`), but that has been deprecated and will not + be supported in later versions * Cannot start with `-`, `_`, or `+` * Cannot + be `.` or `..` * Cannot be longer than 255 bytes (note thtat it is bytes, + so multi-byte characters will reach the limit faster) * Names starting with + `.` are deprecated, except for hidden indices and internal indices managed + by plugins :param aliases: Aliases for the index. :param mappings: Mapping for fields in the index. If specified, this mapping can include: - Field names - Field data types - Mapping parameters @@ -942,7 +950,7 @@ async def delete( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param master_timeout: Period to wait for a connection to the master node. If @@ -1173,6 +1181,71 @@ async def delete_data_stream( path_parts=__path_parts, ) + @_rewrite_parameters() + async def delete_data_stream_options( + self, + *, + name: t.Union[str, t.Sequence[str]], + error_trace: t.Optional[bool] = None, + expand_wildcards: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[str, t.Literal["all", "closed", "hidden", "none", "open"]] + ], + t.Union[str, t.Literal["all", "closed", "hidden", "none", "open"]], + ] + ] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + pretty: t.Optional[bool] = None, + timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Delete data stream options. + Removes the data stream options from a data stream.

    + + + ``_ + + :param name: A comma-separated list of data streams of which the data stream + options will be deleted; use `*` to get all data streams + :param expand_wildcards: Whether wildcard expressions should get expanded to + open or closed indices (default: open) + :param master_timeout: Specify timeout for connection to master + :param timeout: Explicit timestamp for the document + """ + if name in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'name'") + __path_parts: t.Dict[str, str] = {"name": _quote(name)} + __path = f'/_data_stream/{__path_parts["name"]}/_options' + __query: t.Dict[str, t.Any] = {} + if error_trace is not None: + __query["error_trace"] = error_trace + if expand_wildcards is not None: + __query["expand_wildcards"] = expand_wildcards + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if master_timeout is not None: + __query["master_timeout"] = master_timeout + if pretty is not None: + __query["pretty"] = pretty + if timeout is not None: + __query["timeout"] = timeout + __headers = {"accept": "application/json"} + return await self.perform_request( # type: ignore[return-value] + "DELETE", + __path, + params=__query, + headers=__headers, + endpoint_id="indices.delete_data_stream_options", + path_parts=__path_parts, + ) + @_rewrite_parameters() async def delete_index_template( self, @@ -1246,7 +1319,8 @@ async def delete_template( """ .. raw:: html -

    Delete a legacy index template.

    +

    Delete a legacy index template. + IMPORTANT: This documentation is about legacy index templates, which are deprecated and will be replaced by the composable templates introduced in Elasticsearch 7.8.

    ``_ @@ -1486,7 +1560,7 @@ async def exists( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param flat_settings: If `true`, returns settings in flat format. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. @@ -1570,7 +1644,7 @@ async def exists_alias( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, requests that include a missing data stream or index in the target indices or data streams return an error. :param master_timeout: Period to wait for a connection to the master node. If @@ -1919,7 +1993,7 @@ async def flush( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param force: If `true`, the request forces a flush even if there are no changes to commit to the index. :param ignore_unavailable: If `false`, the request returns an error if it targets @@ -2237,7 +2311,7 @@ async def get_alias( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param master_timeout: Period to wait for a connection to the master node. If @@ -2317,8 +2391,7 @@ async def get_data_lifecycle( wildcards (`*`). To target all data streams, omit this parameter or use `*` or `_all`. :param expand_wildcards: Type of data stream that wildcard patterns can match. - Supports comma-separated values, such as `open,hidden`. Valid values are: - `all`, `open`, `closed`, `hidden`, `none`. + Supports comma-separated values, such as `open,hidden`. :param include_defaults: If `true`, return all default settings in the response. :param master_timeout: Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and @@ -2469,6 +2542,121 @@ async def get_data_stream( path_parts=__path_parts, ) + @_rewrite_parameters() + async def get_data_stream_options( + self, + *, + name: t.Union[str, t.Sequence[str]], + error_trace: t.Optional[bool] = None, + expand_wildcards: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[str, t.Literal["all", "closed", "hidden", "none", "open"]] + ], + t.Union[str, t.Literal["all", "closed", "hidden", "none", "open"]], + ] + ] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + pretty: t.Optional[bool] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Get data stream options.

    +

    Get the data stream options configuration of one or more data streams.

    + + + ``_ + + :param name: Comma-separated list of data streams to limit the request. Supports + wildcards (`*`). To target all data streams, omit this parameter or use `*` + or `_all`. + :param expand_wildcards: Type of data stream that wildcard patterns can match. + Supports comma-separated values, such as `open,hidden`. + :param master_timeout: Period to wait for a connection to the master node. If + no response is received before the timeout expires, the request fails and + returns an error. + """ + if name in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'name'") + __path_parts: t.Dict[str, str] = {"name": _quote(name)} + __path = f'/_data_stream/{__path_parts["name"]}/_options' + __query: t.Dict[str, t.Any] = {} + if error_trace is not None: + __query["error_trace"] = error_trace + if expand_wildcards is not None: + __query["expand_wildcards"] = expand_wildcards + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if master_timeout is not None: + __query["master_timeout"] = master_timeout + if pretty is not None: + __query["pretty"] = pretty + __headers = {"accept": "application/json"} + return await self.perform_request( # type: ignore[return-value] + "GET", + __path, + params=__query, + headers=__headers, + endpoint_id="indices.get_data_stream_options", + path_parts=__path_parts, + ) + + @_rewrite_parameters() + async def get_data_stream_settings( + self, + *, + name: t.Union[str, t.Sequence[str]], + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + pretty: t.Optional[bool] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Get data stream settings.

    +

    Get setting information for one or more data streams.

    + + + ``_ + + :param name: A comma-separated list of data streams or data stream patterns. + Supports wildcards (`*`). + :param master_timeout: The period to wait for a connection to the master node. + If no response is received before the timeout expires, the request fails + and returns an error. + """ + if name in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'name'") + __path_parts: t.Dict[str, str] = {"name": _quote(name)} + __path = f'/_data_stream/{__path_parts["name"]}/_settings' + __query: t.Dict[str, t.Any] = {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if master_timeout is not None: + __query["master_timeout"] = master_timeout + if pretty is not None: + __query["pretty"] = pretty + __headers = {"accept": "application/json"} + return await self.perform_request( # type: ignore[return-value] + "GET", + __path, + params=__query, + headers=__headers, + endpoint_id="indices.get_data_stream_settings", + path_parts=__path_parts, + ) + @_rewrite_parameters() async def get_field_mapping( self, @@ -2513,7 +2701,7 @@ async def get_field_mapping( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param include_defaults: If `true`, return all default settings in the response. @@ -2665,7 +2853,7 @@ async def get_mapping( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param local: If `true`, the request retrieves information from the local node @@ -2875,7 +3063,7 @@ async def get_template( """ .. raw:: html -

    Get index templates. +

    Get legacy index templates. Get information about one or more index templates.

    IMPORTANT: This documentation is about legacy index templates, which are deprecated and will be replaced by the composable templates introduced in Elasticsearch 7.8.

    @@ -3157,7 +3345,7 @@ async def open( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param master_timeout: Period to wait for a connection to the master node. If @@ -3416,8 +3604,7 @@ async def put_data_lifecycle( for this data stream. A data stream lifecycle that's disabled (enabled: `false`) will have no effect on the data stream. :param expand_wildcards: Type of data stream that wildcard patterns can match. - Supports comma-separated values, such as `open,hidden`. Valid values are: - `all`, `hidden`, `open`, `closed`, `none`. + Supports comma-separated values, such as `open,hidden`. :param master_timeout: Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and returns an error. @@ -3466,6 +3653,167 @@ async def put_data_lifecycle( path_parts=__path_parts, ) + @_rewrite_parameters( + body_fields=("failure_store",), + ) + async def put_data_stream_options( + self, + *, + name: t.Union[str, t.Sequence[str]], + error_trace: t.Optional[bool] = None, + expand_wildcards: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[str, t.Literal["all", "closed", "hidden", "none", "open"]] + ], + t.Union[str, t.Literal["all", "closed", "hidden", "none", "open"]], + ] + ] = None, + failure_store: t.Optional[t.Mapping[str, t.Any]] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + pretty: t.Optional[bool] = None, + timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Update data stream options. + Update the data stream options of the specified data streams.

    + + + ``_ + + :param name: Comma-separated list of data streams used to limit the request. + Supports wildcards (`*`). To target all data streams use `*` or `_all`. + :param expand_wildcards: Type of data stream that wildcard patterns can match. + Supports comma-separated values, such as `open,hidden`. + :param failure_store: If defined, it will update the failure store configuration + of every data stream resolved by the name expression. + :param master_timeout: Period to wait for a connection to the master node. If + no response is received before the timeout expires, the request fails and + returns an error. + :param timeout: Period to wait for a response. If no response is received before + the timeout expires, the request fails and returns an error. + """ + if name in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'name'") + __path_parts: t.Dict[str, str] = {"name": _quote(name)} + __path = f'/_data_stream/{__path_parts["name"]}/_options' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if expand_wildcards is not None: + __query["expand_wildcards"] = expand_wildcards + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if master_timeout is not None: + __query["master_timeout"] = master_timeout + if pretty is not None: + __query["pretty"] = pretty + if timeout is not None: + __query["timeout"] = timeout + if not __body: + if failure_store is not None: + __body["failure_store"] = failure_store + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="indices.put_data_stream_options", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_name="settings", + ) + async def put_data_stream_settings( + self, + *, + name: t.Union[str, t.Sequence[str]], + settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Mapping[str, t.Any]] = None, + dry_run: t.Optional[bool] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + pretty: t.Optional[bool] = None, + timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Update data stream settings.

    +

    This API can be used to override settings on specific data streams. These overrides will take precedence over what + is specified in the template that the data stream matches. To prevent your data stream from getting into an invalid state, + only certain settings are allowed. If possible, the setting change is applied to all + backing indices. Otherwise, it will be applied when the data stream is next rolled over.

    + + + ``_ + + :param name: A comma-separated list of data streams or data stream patterns. + :param settings: + :param dry_run: If `true`, the request does not actually change the settings + on any data streams or indices. Instead, it simulates changing the settings + and reports back to the user what would have happened had these settings + actually been applied. + :param master_timeout: The period to wait for a connection to the master node. + If no response is received before the timeout expires, the request fails + and returns an error. + :param timeout: The period to wait for a response. If no response is received + before the timeout expires, the request fails and returns an error. + """ + if name in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'name'") + if settings is None and body is None: + raise ValueError( + "Empty value passed for parameters 'settings' and 'body', one of them should be set." + ) + elif settings is not None and body is not None: + raise ValueError("Cannot set both 'settings' and 'body'") + __path_parts: t.Dict[str, str] = {"name": _quote(name)} + __path = f'/_data_stream/{__path_parts["name"]}/_settings' + __query: t.Dict[str, t.Any] = {} + if dry_run is not None: + __query["dry_run"] = dry_run + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if master_timeout is not None: + __query["master_timeout"] = master_timeout + if pretty is not None: + __query["pretty"] = pretty + if timeout is not None: + __query["timeout"] = timeout + __body = settings if settings is not None else body + __headers = {"accept": "application/json", "content-type": "application/json"} + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="indices.put_data_stream_settings", + path_parts=__path_parts, + ) + @_rewrite_parameters( body_fields=( "allow_auto_create", @@ -3693,24 +4041,17 @@ async def put_mapping(

    Update field mappings. Add new fields to an existing data stream or index. - You can also use this API to change the search settings of existing fields and add new properties to existing object fields. - For data streams, these changes are applied to all backing indices by default.

    -

    Add multi-fields to an existing field

    -

    Multi-fields let you index the same field in different ways. - You can use this API to update the fields mapping parameter and enable multi-fields for an existing field. - WARNING: If an index (or data stream) contains documents when you add a multi-field, those documents will not have values for the new multi-field. - You can populate the new multi-field with the update by query API.

    -

    Change supported mapping parameters for an existing field

    -

    The documentation for each mapping parameter indicates whether you can update it for an existing field using this API. - For example, you can use the update mapping API to update the ignore_above parameter.

    -

    Change the mapping of an existing field

    -

    Except for supported mapping parameters, you can't change the mapping or field type of an existing field. - Changing an existing field could invalidate data that's already indexed.

    -

    If you need to change the mapping of a field in a data stream's backing indices, refer to documentation about modifying data streams. - If you need to change the mapping of a field in other indices, create a new index with the correct mapping and reindex your data into that index.

    -

    Rename a field

    -

    Renaming a field would invalidate data already indexed under the old field name. - Instead, add an alias field to create an alternate field name.

    + You can use the update mapping API to:

    +
      +
    • Add a new field to an existing index
    • +
    • Update mappings for multiple indices in a single request
    • +
    • Add new properties to an object field
    • +
    • Enable multi-fields for an existing field
    • +
    • Update supported mapping parameters
    • +
    • Change a field's mapping using reindexing
    • +
    • Rename a field using a field alias
    • +
    +

    Learn how to use the update mapping API with practical examples in the Update mapping API examples guide.

    ``_ @@ -3729,7 +4070,7 @@ async def put_mapping( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param field_names: Control whether field names are enabled for the index. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. @@ -3847,8 +4188,36 @@ async def put_settings( Changes dynamic index settings in real time. For data streams, index setting changes are applied to all backing indices by default.

    To revert a setting to the default value, use a null value. - The list of per-index settings that can be updated dynamically on live indices can be found in index module documentation. + The list of per-index settings that can be updated dynamically on live indices can be found in index settings documentation. To preserve existing settings from being updated, set the preserve_existing parameter to true.

    +

    For performance optimization during bulk indexing, you can disable the refresh interval. + Refer to disable refresh interval for an example. + There are multiple valid ways to represent index settings in the request body. You can specify only the setting, for example:

    +
    {
    +            "number_of_replicas": 1
    +          }
    +          
    +

    Or you can use an index setting object:

    +
    {
    +            "index": {
    +              "number_of_replicas": 1
    +            }
    +          }
    +          
    +

    Or you can use dot annotation:

    +
    {
    +            "index.number_of_replicas": 1
    +          }
    +          
    +

    Or you can embed any of the aforementioned options in a settings object. For example:

    +
    {
    +            "settings": {
    +              "index": {
    +                "number_of_replicas": 1
    +              }
    +            }
    +          }
    +          

    NOTE: You can only define new analyzers on closed indices. To add an analyzer, you must close the index, define the analyzer, and reopen the index. You cannot close the write index of a data stream. @@ -3856,7 +4225,8 @@ async def put_settings( Then roll over the data stream to apply the new analyzer to the stream's write index and future backing indices. This affects searches and any new data added to the stream after the rollover. However, it does not affect the data stream's backing indices or their existing data. - To change the analyzer for existing backing indices, you must create a new data stream and reindex your data into it.

    + To change the analyzer for existing backing indices, you must create a new data stream and reindex your data into it. + Refer to updating analyzers on existing indices for step-by-step examples.

    ``_ @@ -3968,7 +4338,7 @@ async def put_template( """ .. raw:: html -

    Create or update an index template. +

    Create or update a legacy index template. Index templates define settings, mappings, and aliases that can be applied automatically to new indices. Elasticsearch applies templates to new indices based on an index pattern that matches the index name.

    IMPORTANT: This documentation is about legacy index templates, which are deprecated and will be replaced by the composable templates introduced in Elasticsearch 7.8.

    @@ -4172,7 +4542,7 @@ async def refresh( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. """ @@ -4288,6 +4658,105 @@ async def reload_search_analyzers( path_parts=__path_parts, ) + @_rewrite_parameters() + async def remove_block( + self, + *, + index: str, + block: t.Union[str, t.Literal["metadata", "read", "read_only", "write"]], + allow_no_indices: t.Optional[bool] = None, + error_trace: t.Optional[bool] = None, + expand_wildcards: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[str, t.Literal["all", "closed", "hidden", "none", "open"]] + ], + t.Union[str, t.Literal["all", "closed", "hidden", "none", "open"]], + ] + ] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + ignore_unavailable: t.Optional[bool] = None, + master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + pretty: t.Optional[bool] = None, + timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Remove an index block.

    +

    Remove an index block from an index. + Index blocks limit the operations allowed on an index by blocking specific operation types.

    + + + ``_ + + :param index: A comma-separated list or wildcard expression of index names used + to limit the request. By default, you must explicitly name the indices you + are removing blocks from. To allow the removal of blocks from indices with + `_all`, `*`, or other wildcard expressions, change the `action.destructive_requires_name` + setting to `false`. You can update this setting in the `elasticsearch.yml` + file or by using the cluster update settings API. + :param block: The block type to remove from the index. + :param allow_no_indices: If `false`, the request returns an error if any wildcard + expression, index alias, or `_all` value targets only missing or closed indices. + This behavior applies even if the request targets other open indices. For + example, a request targeting `foo*,bar*` returns an error if an index starts + with `foo` but no index starts with `bar`. + :param expand_wildcards: The type of index that wildcard patterns can match. + If the request can target data streams, this argument determines whether + wildcard expressions match hidden data streams. It supports comma-separated + values, such as `open,hidden`. + :param ignore_unavailable: If `false`, the request returns an error if it targets + a missing or closed index. + :param master_timeout: The period to wait for the master node. If the master + node is not available before the timeout expires, the request fails and returns + an error. It can also be set to `-1` to indicate that the request should + never timeout. + :param timeout: The period to wait for a response from all relevant nodes in + the cluster after updating the cluster metadata. If no response is received + before the timeout expires, the cluster metadata update still applies but + the response will indicate that it was not completely acknowledged. It can + also be set to `-1` to indicate that the request should never timeout. + """ + if index in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'index'") + if block in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'block'") + __path_parts: t.Dict[str, str] = { + "index": _quote(index), + "block": _quote(block), + } + __path = f'/{__path_parts["index"]}/_block/{__path_parts["block"]}' + __query: t.Dict[str, t.Any] = {} + if allow_no_indices is not None: + __query["allow_no_indices"] = allow_no_indices + if error_trace is not None: + __query["error_trace"] = error_trace + if expand_wildcards is not None: + __query["expand_wildcards"] = expand_wildcards + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if ignore_unavailable is not None: + __query["ignore_unavailable"] = ignore_unavailable + if master_timeout is not None: + __query["master_timeout"] = master_timeout + if pretty is not None: + __query["pretty"] = pretty + if timeout is not None: + __query["timeout"] = timeout + __headers = {"accept": "application/json"} + return await self.perform_request( # type: ignore[return-value] + "DELETE", + __path, + params=__query, + headers=__headers, + endpoint_id="indices.remove_block", + path_parts=__path_parts, + ) + @_rewrite_parameters() async def resolve_cluster( self, @@ -4371,10 +4840,9 @@ async def resolve_cluster( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. - NOTE: This option is only supported when specifying an index expression. - You will get an error if you specify index options to the `_resolve/cluster` - API endpoint that takes no index expression. + as `open,hidden`. NOTE: This option is only supported when specifying an + index expression. You will get an error if you specify index options to the + `_resolve/cluster` API endpoint that takes no index expression. :param ignore_throttled: If true, concrete, expanded, or aliased indices are ignored when frozen. NOTE: This option is only supported when specifying an index expression. You will get an error if you specify index options to @@ -4467,7 +4935,7 @@ async def resolve_index( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. """ @@ -4681,7 +5149,7 @@ async def segments( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. """ @@ -5505,7 +5973,7 @@ async def validate_query( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param explain: If `true`, the response returns detailed information if an error has occurred. :param ignore_unavailable: If `false`, the request returns an error if it targets diff --git a/elasticsearch/_async/client/inference.py b/elasticsearch/_async/client/inference.py index 1e8c60aaa..0d98df514 100644 --- a/elasticsearch/_async/client/inference.py +++ b/elasticsearch/_async/client/inference.py @@ -370,22 +370,38 @@ async def put( """ .. raw:: html -

    Create an inference endpoint. - When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    +

    Create an inference endpoint.

    IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.

    +

    The following integrations are available through the inference API. You can find the available task types next to the integration name:

    +
      +
    • AlibabaCloud AI Search (completion, rerank, sparse_embedding, text_embedding)
    • +
    • Amazon Bedrock (completion, text_embedding)
    • +
    • Anthropic (completion)
    • +
    • Azure AI Studio (completion, text_embedding)
    • +
    • Azure OpenAI (completion, text_embedding)
    • +
    • Cohere (completion, rerank, text_embedding)
    • +
    • DeepSeek (completion, chat_completion)
    • +
    • Elasticsearch (rerank, sparse_embedding, text_embedding - this service is for built-in models and models uploaded through Eland)
    • +
    • ELSER (sparse_embedding)
    • +
    • Google AI Studio (completion, text_embedding)
    • +
    • Google Vertex AI (rerank, text_embedding)
    • +
    • Hugging Face (chat_completion, completion, rerank, text_embedding)
    • +
    • Mistral (chat_completion, completion, text_embedding)
    • +
    • OpenAI (chat_completion, completion, text_embedding)
    • +
    • VoyageAI (text_embedding, rerank)
    • +
    • Watsonx inference integration (text_embedding)
    • +
    • JinaAI (text_embedding, rerank)
    • +
    ``_ :param inference_id: The inference Id :param inference_config: - :param task_type: The task type + :param task_type: The task type. Refer to the integration list in the API description + for the available task types. """ if inference_id in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'inference_id'") @@ -458,11 +474,6 @@ async def put_alibabacloud(

    Create an AlibabaCloud AI Search inference endpoint.

    Create an inference endpoint to perform an inference task with the alibabacloud-ai-search service.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -553,16 +564,11 @@ async def put_amazonbedrock( .. raw:: html

    Create an Amazon Bedrock inference endpoint.

    -

    Creates an inference endpoint to perform an inference task with the amazonbedrock service.

    +

    Create an inference endpoint to perform an inference task with the amazonbedrock service.

    info You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -654,11 +660,6 @@ async def put_anthropic(

    Create an Anthropic inference endpoint.

    Create an inference endpoint to perform an inference task with the anthropic service.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -751,11 +752,6 @@ async def put_azureaistudio(

    Create an Azure AI studio inference endpoint.

    Create an inference endpoint to perform an inference task with the azureaistudio service.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -853,11 +849,6 @@ async def put_azureopenai(
  • GPT-3.5
  • The list of embeddings models that you can choose from in your deployment can be found in the Azure models documentation.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -951,11 +942,6 @@ async def put_cohere(

    Create a Cohere inference endpoint.

    Create an inference endpoint to perform an inference task with the cohere service.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -1017,6 +1003,85 @@ async def put_cohere( path_parts=__path_parts, ) + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + async def put_deepseek( + self, + *, + task_type: t.Union[str, t.Literal["chat_completion", "completion"]], + deepseek_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["deepseek"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Create a DeepSeek inference endpoint.

    +

    Create an inference endpoint to perform an inference task with the deepseek service.

    + + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param deepseek_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `deepseek`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `deepseek` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if deepseek_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'deepseek_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "deepseek_inference_id": _quote(deepseek_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["deepseek_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_deepseek", + path_parts=__path_parts, + ) + @_rewrite_parameters( body_fields=( "service", @@ -1239,11 +1304,6 @@ async def put_googleaistudio(

    Create an Google AI Studio inference endpoint.

    Create an inference endpoint to perform an inference task with the googleaistudio service.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -1314,7 +1374,9 @@ async def put_googleaistudio( async def put_googlevertexai( self, *, - task_type: t.Union[str, t.Literal["rerank", "text_embedding"]], + task_type: t.Union[ + str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"] + ], googlevertexai_inference_id: str, service: t.Optional[t.Union[str, t.Literal["googlevertexai"]]] = None, service_settings: t.Optional[t.Mapping[str, t.Any]] = None, @@ -1331,11 +1393,6 @@ async def put_googlevertexai(

    Create a Google Vertex AI inference endpoint.

    Create an inference endpoint to perform an inference task with the googlevertexai service.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -1400,12 +1457,19 @@ async def put_googlevertexai( ) @_rewrite_parameters( - body_fields=("service", "service_settings", "chunking_settings"), + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), ) async def put_hugging_face( self, *, - task_type: t.Union[str, t.Literal["text_embedding"]], + task_type: t.Union[ + str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"] + ], huggingface_inference_id: str, service: t.Optional[t.Union[str, t.Literal["hugging_face"]]] = None, service_settings: t.Optional[t.Mapping[str, t.Any]] = None, @@ -1414,17 +1478,21 @@ async def put_hugging_face( filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, body: t.Optional[t.Dict[str, t.Any]] = None, ) -> ObjectApiResponse[t.Any]: """ .. raw:: html

    Create a Hugging Face inference endpoint.

    -

    Create an inference endpoint to perform an inference task with the hugging_face service.

    -

    You must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL. - Select the model you want to use on the new endpoint creation page (for example intfloat/e5-small-v2), then select the sentence embeddings task under the advanced configuration section. - Create the endpoint and copy the URL after the endpoint initialization has been finished.

    -

    The following models are recommended for the Hugging Face service:

    +

    Create an inference endpoint to perform an inference task with the hugging_face service. + Supported tasks include: text_embedding, completion, and chat_completion.

    +

    To configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint. + Select a model that supports the task you intend to use.

    +

    For Elastic's text_embedding task: + The selected model must support the Sentence Embeddings task. On the new endpoint creation page, select the Sentence Embeddings task under the Advanced Configuration section. + After the endpoint has initialized, copy the generated endpoint URL. + Recommended models for text_embedding task:

    • all-MiniLM-L6-v2
    • all-MiniLM-L12-v2
    • @@ -1434,11 +1502,24 @@ async def put_hugging_face(
    • multilingual-e5-base
    • multilingual-e5-small
    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    +

    For Elastic's chat_completion and completion tasks: + The selected model must support the Text Generation task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for Text Generation. When creating dedicated endpoint select the Text Generation task. + After the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes /v1/chat/completions part in URL. Then, copy the full endpoint URL for use. + Recommended models for chat_completion and completion tasks:

    +
      +
    • Mistral-7B-Instruct-v0.2
    • +
    • QwQ-32B
    • +
    • Phi-3-mini-128k-instruct
    • +
    +

    For Elastic's rerank task: + The selected model must support the sentence-ranking task and expose OpenAI API. + HuggingFace supports only dedicated (not serverless) endpoints for Rerank so far. + After the endpoint is initialized, copy the full endpoint URL for use. + Tested models for rerank task:

    +
      +
    • bge-reranker-base
    • +
    • jina-reranker-v1-turbo-en-GGUF
    • +
    ``_ @@ -1450,6 +1531,8 @@ async def put_hugging_face( :param service_settings: Settings used to install the inference model. These settings are specific to the `hugging_face` service. :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. """ if task_type in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'task_type'") @@ -1483,6 +1566,8 @@ async def put_hugging_face( __body["service_settings"] = service_settings if chunking_settings is not None: __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings if not __body: __body = None # type: ignore[assignment] __headers = {"accept": "application/json"} @@ -1528,11 +1613,6 @@ async def put_jinaai(

    Create an inference endpoint to perform an inference task with the jinaai service.

    To review the available rerank models, refer to https://jina.ai/reranker. To review the available text_embedding models, refer to the https://jina.ai/embeddings/.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -1600,7 +1680,9 @@ async def put_jinaai( async def put_mistral( self, *, - task_type: t.Union[str, t.Literal["text_embedding"]], + task_type: t.Union[ + str, t.Literal["chat_completion", "completion", "text_embedding"] + ], mistral_inference_id: str, service: t.Optional[t.Union[str, t.Literal["mistral"]]] = None, service_settings: t.Optional[t.Mapping[str, t.Any]] = None, @@ -1615,18 +1697,12 @@ async def put_mistral( .. raw:: html

    Create a Mistral inference endpoint.

    -

    Creates an inference endpoint to perform an inference task with the mistral service.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    +

    Create an inference endpoint to perform an inference task with the mistral service.

    ``_ - :param task_type: The task type. The only valid task type for the model to perform - is `text_embedding`. + :param task_type: The type of the inference task that the model will perform. :param mistral_inference_id: The unique identifier of the inference endpoint. :param service: The type of service supported for the specified task type. In this case, `mistral`. @@ -1709,11 +1785,6 @@ async def put_openai(

    Create an OpenAI inference endpoint.

    Create an inference endpoint to perform an inference task with the openai service or openai compatible APIs.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -1873,7 +1944,9 @@ async def put_voyageai( async def put_watsonx( self, *, - task_type: t.Union[str, t.Literal["text_embedding"]], + task_type: t.Union[ + str, t.Literal["chat_completion", "completion", "text_embedding"] + ], watsonx_inference_id: str, service: t.Optional[t.Union[str, t.Literal["watsonxai"]]] = None, service_settings: t.Optional[t.Mapping[str, t.Any]] = None, @@ -1890,17 +1963,11 @@ async def put_watsonx(

    Create an inference endpoint to perform an inference task with the watsonxai service. You need an IBM Cloud Databases for Elasticsearch deployment to use the watsonxai inference service. You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ - :param task_type: The task type. The only valid task type for the model to perform - is `text_embedding`. + :param task_type: The type of the inference task that the model will perform. :param watsonx_inference_id: The unique identifier of the inference endpoint. :param service: The type of service supported for the specified task type. In this case, `watsonxai`. diff --git a/elasticsearch/_async/client/ml.py b/elasticsearch/_async/client/ml.py index 49232cc52..fb5af673f 100644 --- a/elasticsearch/_async/client/ml.py +++ b/elasticsearch/_async/client/ml.py @@ -1676,7 +1676,7 @@ async def get_data_frame_analytics_stats( """ .. raw:: html -

    Get data frame analytics jobs usage info.

    +

    Get data frame analytics job stats.

    ``_ @@ -1744,7 +1744,7 @@ async def get_datafeed_stats( """ .. raw:: html -

    Get datafeeds usage info. +

    Get datafeed stats. You can get statistics for multiple datafeeds in a single API request by using a comma-separated list of datafeeds or a wildcard expression. You can get statistics for all datafeeds by using _all, by specifying * as the @@ -2033,7 +2033,7 @@ async def get_job_stats( """ .. raw:: html -

    Get anomaly detection jobs usage info.

    +

    Get anomaly detection job stats.

    ``_ @@ -3871,13 +3871,7 @@ async def put_job( :param description: A description of the job. :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard - expressions match hidden data streams. Supports comma-separated values. Valid - values are: * `all`: Match any data stream or index, including hidden ones. - * `closed`: Match closed, non-hidden indices. Also matches any non-hidden - data stream. Data streams cannot be closed. * `hidden`: Match hidden data - streams and hidden indices. Must be combined with `open`, `closed`, or both. - * `none`: Wildcard patterns are not accepted. * `open`: Match open, non-hidden - indices. Also matches any non-hidden data stream. + expressions match hidden data streams. Supports comma-separated values. :param groups: A list of job groups. A job can belong to no groups or many. :param ignore_throttled: If `true`, concrete, expanded or aliased indices are ignored when frozen. @@ -5140,13 +5134,7 @@ async def update_datafeed( check runs only on real-time datafeeds. :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard - expressions match hidden data streams. Supports comma-separated values. Valid - values are: * `all`: Match any data stream or index, including hidden ones. - * `closed`: Match closed, non-hidden indices. Also matches any non-hidden - data stream. Data streams cannot be closed. * `hidden`: Match hidden data - streams and hidden indices. Must be combined with `open`, `closed`, or both. - * `none`: Wildcard patterns are not accepted. * `open`: Match open, non-hidden - indices. Also matches any non-hidden data stream. + expressions match hidden data streams. Supports comma-separated values. :param frequency: The interval at which scheduled queries are made while the datafeed runs in real time. The default value is either the bucket span for short bucket spans, or, for longer bucket spans, a sensible fraction of the diff --git a/elasticsearch/_async/client/rollup.py b/elasticsearch/_async/client/rollup.py index e0d581d77..ea1ace0dc 100644 --- a/elasticsearch/_async/client/rollup.py +++ b/elasticsearch/_async/client/rollup.py @@ -419,28 +419,7 @@ async def rollup_search( The following functionality is not available:

    size: Because rollups work on pre-aggregated data, no search hits can be returned and so size must be set to zero or omitted entirely. highlighter, suggestors, post_filter, profile, explain: These are similarly disallowed.

    -

    Searching both historical rollup and non-rollup data

    -

    The rollup search API has the capability to search across both "live" non-rollup data and the aggregated rollup data. - This is done by simply adding the live indices to the URI. For example:

    -
    GET sensor-1,sensor_rollup/_rollup_search
    -          {
    -            "size": 0,
    -            "aggregations": {
    -               "max_temperature": {
    -                "max": {
    -                  "field": "temperature"
    -                }
    -              }
    -            }
    -          }
    -          
    -

    The rollup search endpoint does two things when the search runs:

    -
      -
    • The original request is sent to the non-rollup index unaltered.
    • -
    • A rewritten version of the original request is sent to the rollup index.
    • -
    -

    When the two responses are received, the endpoint rewrites the rollup response and merges the two together. - During the merging process, if there is any overlap in buckets between the two responses, the buckets from the non-rollup index are used.

    +

    For more detailed examples of using the rollup search API, including querying rolled-up data only or combining rolled-up and live data, refer to the External documentation.

    ``_ diff --git a/elasticsearch/_async/client/security.py b/elasticsearch/_async/client/security.py index 840a3d249..14bf20ab8 100644 --- a/elasticsearch/_async/client/security.py +++ b/elasticsearch/_async/client/security.py @@ -2455,6 +2455,7 @@ async def has_privileges( "manage_data_frame_transforms", "manage_data_stream_global_retention", "manage_enrich", + "manage_esql", "manage_ilm", "manage_index_templates", "manage_inference", @@ -2480,6 +2481,7 @@ async def has_privileges( "monitor_data_frame_transforms", "monitor_data_stream_global_retention", "monitor_enrich", + "monitor_esql", "monitor_inference", "monitor_ml", "monitor_rollup", @@ -3126,6 +3128,7 @@ async def put_role( "manage_data_frame_transforms", "manage_data_stream_global_retention", "manage_enrich", + "manage_esql", "manage_ilm", "manage_index_templates", "manage_inference", @@ -3151,6 +3154,7 @@ async def put_role( "monitor_data_frame_transforms", "monitor_data_stream_global_retention", "monitor_enrich", + "monitor_esql", "monitor_inference", "monitor_ml", "monitor_rollup", @@ -3553,7 +3557,8 @@ async def query_api_keys( You can optionally filter the results with a query.

    To use this API, you must have at least the manage_own_api_key or the read_security cluster privileges. If you have only the manage_own_api_key privilege, this API returns only the API keys that you own. - If you have the read_security, manage_api_key, or greater privileges (including manage_security), this API returns all API keys regardless of ownership.

    + If you have the read_security, manage_api_key, or greater privileges (including manage_security), this API returns all API keys regardless of ownership. + Refer to the linked documentation for examples of how to find API keys:

    ``_ @@ -4466,6 +4471,7 @@ async def update_cross_cluster_api_key(

    This API supports updates to an API key's access scope, metadata, and expiration. The owner user's information, such as the username and realm, is also updated automatically on every call.

    NOTE: This API cannot update REST API keys, which should be updated by either the update API key or bulk update API keys API.

    +

    To learn more about how to use this API, refer to the Update cross cluter API key API examples page.

    ``_ diff --git a/elasticsearch/_async/client/snapshot.py b/elasticsearch/_async/client/snapshot.py index ecead9049..c88d48d4a 100644 --- a/elasticsearch/_async/client/snapshot.py +++ b/elasticsearch/_async/client/snapshot.py @@ -544,6 +544,28 @@ async def get( ], ] ] = None, + state: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[ + str, + t.Literal[ + "FAILED", + "INCOMPATIBLE", + "IN_PROGRESS", + "PARTIAL", + "SUCCESS", + ], + ] + ], + t.Union[ + str, + t.Literal[ + "FAILED", "INCOMPATIBLE", "IN_PROGRESS", "PARTIAL", "SUCCESS" + ], + ], + ] + ] = None, verbose: t.Optional[bool] = None, ) -> ObjectApiResponse[t.Any]: """ @@ -602,6 +624,8 @@ async def get( all snapshots without an SLM policy. :param sort: The sort order for the result. The default behavior is sorting by snapshot start time stamp. + :param state: Only return snapshots with a state found in the given comma-separated + list of snapshot states. The default is all snapshot states. :param verbose: If `true`, returns additional information about each snapshot such as the version of Elasticsearch which took the snapshot, the start and end times of the snapshot, and the number of shards snapshotted. NOTE: The @@ -651,6 +675,8 @@ async def get( __query["slm_policy_filter"] = slm_policy_filter if sort is not None: __query["sort"] = sort + if state is not None: + __query["state"] = state if verbose is not None: __query["verbose"] = verbose __headers = {"accept": "application/json"} @@ -749,20 +775,28 @@ async def repository_analyze( """ .. raw:: html -

    Analyze a snapshot repository. - Analyze the performance characteristics and any incorrect behaviour found in a repository.

    -

    The response exposes implementation details of the analysis which may change from version to version. - The response body format is therefore not considered stable and may be different in newer versions.

    +

    Analyze a snapshot repository.

    +

    Performs operations on a snapshot repository in order to check for incorrect behaviour.

    There are a large number of third-party storage systems available, not all of which are suitable for use as a snapshot repository by Elasticsearch. - Some storage systems behave incorrectly, or perform poorly, especially when accessed concurrently by multiple clients as the nodes of an Elasticsearch cluster do. This API performs a collection of read and write operations on your repository which are designed to detect incorrect behaviour and to measure the performance characteristics of your storage system.

    + Some storage systems behave incorrectly, or perform poorly, especially when accessed concurrently by multiple clients as the nodes of an Elasticsearch cluster do. + This API performs a collection of read and write operations on your repository which are designed to detect incorrect behaviour and to measure the performance characteristics of your storage system.

    The default values for the parameters are deliberately low to reduce the impact of running an analysis inadvertently and to provide a sensible starting point for your investigations. Run your first analysis with the default parameter values to check for simple problems. - If successful, run a sequence of increasingly large analyses until you encounter a failure or you reach a blob_count of at least 2000, a max_blob_size of at least 2gb, a max_total_data_size of at least 1tb, and a register_operation_count of at least 100. + Some repositories may behave correctly when lightly loaded but incorrectly under production-like workloads. + If the first analysis is successful, run a sequence of increasingly large analyses until you encounter a failure or you reach a blob_count of at least 2000, a max_blob_size of at least 2gb, a max_total_data_size of at least 1tb, and a register_operation_count of at least 100. Always specify a generous timeout, possibly 1h or longer, to allow time for each analysis to run to completion. + Some repositories may behave correctly when accessed by a small number of Elasticsearch nodes but incorrectly when accessed concurrently by a production-scale cluster. Perform the analyses using a multi-node cluster of a similar size to your production cluster so that it can detect any problems that only arise when the repository is accessed by many nodes at once.

    If the analysis fails, Elasticsearch detected that your repository behaved unexpectedly. This usually means you are using a third-party storage system with an incorrect or incompatible implementation of the API it claims to support. If so, this storage system is not suitable for use as a snapshot repository. + Repository analysis triggers conditions that occur only rarely when taking snapshots in a production system. + Snapshotting to unsuitable storage may appear to work correctly most of the time despite repository analysis failures. + However your snapshot data is at risk if you store it in a snapshot repository that does not reliably pass repository analysis. + You can demonstrate that the analysis failure is due to an incompatible storage implementation by verifying that Elasticsearch does not detect the same problem when analysing the reference implementation of the storage protocol you are using. + For instance, if you are using storage that offers an API which the supplier claims to be compatible with AWS S3, verify that repositories in AWS S3 do not fail repository analysis. + This allows you to demonstrate to your storage supplier that a repository analysis failure must only be caused by an incompatibility with AWS S3 and cannot be attributed to a problem in Elasticsearch. + Please do not report Elasticsearch issues involving third-party storage systems unless you can demonstrate that the same issue exists when analysing a repository that uses the reference implementation of the same storage protocol. You will need to work with the supplier of your storage system to address the incompatibilities that Elasticsearch detects.

    If the analysis is successful, the API returns details of the testing process, optionally including how long each operation took. You can use this information to determine the performance of your storage system. @@ -790,14 +824,17 @@ async def repository_analyze( This consumes bandwidth on the network between the cluster and the repository, and storage space and I/O bandwidth on the repository itself. You must ensure this load does not affect other users of these systems. Analyses respect the repository settings max_snapshot_bytes_per_sec and max_restore_bytes_per_sec if available and the cluster setting indices.recovery.max_bytes_per_sec which you can use to limit the bandwidth they consume.

    -

    NOTE: This API is intended for exploratory use by humans. You should expect the request parameters and the response format to vary in future versions.

    +

    NOTE: This API is intended for exploratory use by humans. + You should expect the request parameters and the response format to vary in future versions. + The response exposes immplementation details of the analysis which may change from version to version.

    NOTE: Different versions of Elasticsearch may perform different checks for repository compatibility, with newer versions typically being stricter than older ones. A storage system that passes repository analysis with one version of Elasticsearch may fail with a different version. This indicates it behaves incorrectly in ways that the former version did not detect. You must work with the supplier of your storage system to address the incompatibilities detected by the repository analysis API in any version of Elasticsearch.

    NOTE: This API may not work correctly in a mixed-version cluster.

    Implementation details

    -

    NOTE: This section of documentation describes how the repository analysis API works in this version of Elasticsearch, but you should expect the implementation to vary between versions. The request parameters and response format depend on details of the implementation so may also be different in newer versions.

    +

    NOTE: This section of documentation describes how the repository analysis API works in this version of Elasticsearch, but you should expect the implementation to vary between versions. + The request parameters and response format depend on details of the implementation so may also be different in newer versions.

    The analysis comprises a number of blob-level tasks, as set by the blob_count parameter and a number of compare-and-exchange operations on linearizable registers, as set by the register_operation_count parameter. These tasks are distributed over the data and master-eligible nodes in the cluster for execution.

    For most blob-level tasks, the executing node first writes a blob to the repository and then instructs some of the other nodes in the cluster to attempt to read the data it just wrote. @@ -1223,6 +1260,11 @@ async def status(

    If you omit the <snapshot> request path parameter, the request retrieves information only for currently running snapshots. This usage is preferred. If needed, you can specify <repository> and <snapshot> to retrieve information for specific snapshots, even if they're not currently running.

    +

    Note that the stats will not be available for any shard snapshots in an ongoing snapshot completed by a node that (even momentarily) left the cluster. + Loading the stats from the repository is an expensive operation (see the WARNING below). + Therefore the stats values for such shards will be -1 even though the "stage" value will be "DONE", in order to minimize latency. + A "description" field will be present for a shard snapshot completed by a departed node explaining why the shard snapshot's stats results are invalid. + Consequently, the total stats for the index will be less than expected due to the missing values from these shards.

    WARNING: Using the API to return the status of any snapshots other than currently running snapshots can be expensive. The API requires a read from the repository for each shard in each snapshot. For example, if you have 100 snapshots with 1,000 shards each, an API request that includes all snapshots will require 100,000 reads (100 snapshots x 1,000 shards).

    diff --git a/elasticsearch/_async/client/synonyms.py b/elasticsearch/_async/client/synonyms.py index 26b248a35..2466dfb6c 100644 --- a/elasticsearch/_async/client/synonyms.py +++ b/elasticsearch/_async/client/synonyms.py @@ -90,6 +90,7 @@ async def delete_synonym_rule( filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, pretty: t.Optional[bool] = None, + refresh: t.Optional[bool] = None, ) -> ObjectApiResponse[t.Any]: """ .. raw:: html @@ -102,6 +103,9 @@ async def delete_synonym_rule( :param set_id: The ID of the synonym set to update. :param rule_id: The ID of the synonym rule to delete. + :param refresh: If `true`, the request will refresh the analyzers with the deleted + synonym rule and wait for the new synonyms to be available before returning. + If `false`, analyzers will not be reloaded with the deleted synonym rule """ if set_id in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'set_id'") @@ -121,6 +125,8 @@ async def delete_synonym_rule( __query["human"] = human if pretty is not None: __query["pretty"] = pretty + if refresh is not None: + __query["refresh"] = refresh __headers = {"accept": "application/json"} return await self.perform_request( # type: ignore[return-value] "DELETE", @@ -299,6 +305,7 @@ async def put_synonym( filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, pretty: t.Optional[bool] = None, + refresh: t.Optional[bool] = None, body: t.Optional[t.Dict[str, t.Any]] = None, ) -> ObjectApiResponse[t.Any]: """ @@ -309,12 +316,16 @@ async def put_synonym( If you need to manage more synonym rules, you can create multiple synonym sets.

    When an existing synonyms set is updated, the search analyzers that use the synonyms set are reloaded automatically for all indices. This is equivalent to invoking the reload search analyzers API for all indices that use the synonyms set.

    +

    For practical examples of how to create or update a synonyms set, refer to the External documentation.

    ``_ :param id: The ID of the synonyms set to be created or updated. :param synonyms_set: The synonym rules definitions for the synonyms set. + :param refresh: If `true`, the request will refresh the analyzers with the new + synonyms set and wait for the new synonyms to be available before returning. + If `false`, analyzers will not be reloaded with the new synonym set """ if id in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'id'") @@ -332,6 +343,8 @@ async def put_synonym( __query["human"] = human if pretty is not None: __query["pretty"] = pretty + if refresh is not None: + __query["refresh"] = refresh if not __body: if synonyms_set is not None: __body["synonyms_set"] = synonyms_set @@ -359,6 +372,7 @@ async def put_synonym_rule( filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, pretty: t.Optional[bool] = None, + refresh: t.Optional[bool] = None, body: t.Optional[t.Dict[str, t.Any]] = None, ) -> ObjectApiResponse[t.Any]: """ @@ -376,6 +390,9 @@ async def put_synonym_rule( :param rule_id: The ID of the synonym rule to be updated or created. :param synonyms: The synonym rule information definition, which must be in Solr format. + :param refresh: If `true`, the request will refresh the analyzers with the new + synonym rule and wait for the new synonyms to be available before returning. + If `false`, analyzers will not be reloaded with the new synonym rule """ if set_id in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'set_id'") @@ -398,6 +415,8 @@ async def put_synonym_rule( __query["human"] = human if pretty is not None: __query["pretty"] = pretty + if refresh is not None: + __query["refresh"] = refresh if not __body: if synonyms is not None: __body["synonyms"] = synonyms diff --git a/elasticsearch/_async/client/tasks.py b/elasticsearch/_async/client/tasks.py index e03f0d363..96230cc4c 100644 --- a/elasticsearch/_async/client/tasks.py +++ b/elasticsearch/_async/client/tasks.py @@ -36,7 +36,7 @@ class TasksClient(NamespacedClient): async def cancel( self, *, - task_id: t.Optional[t.Union[int, str]] = None, + task_id: t.Optional[str] = None, actions: t.Optional[t.Union[str, t.Sequence[str]]] = None, error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, diff --git a/elasticsearch/_async/client/watcher.py b/elasticsearch/_async/client/watcher.py index 30f69d0e7..e0b7b39ec 100644 --- a/elasticsearch/_async/client/watcher.py +++ b/elasticsearch/_async/client/watcher.py @@ -45,7 +45,8 @@ async def ack_watch(

    IMPORTANT: If the specified watch is currently being executed, this API will return an error The reason for this behavior is to prevent overwriting the watch status from a watch execution.

    Acknowledging an action throttles further executions of that action until its ack.state is reset to awaits_successful_execution. - This happens when the condition of the watch is not met (the condition evaluates to false).

    + This happens when the condition of the watch is not met (the condition evaluates to false). + To demonstrate how throttling works in practice and how it can be configured for individual actions within a watch, refer to External documentation.

    ``_ @@ -274,7 +275,8 @@ async def execute_watch( This serves as great tool for testing and debugging your watches prior to adding them to Watcher.

    When Elasticsearch security features are enabled on your cluster, watches are run with the privileges of the user that stored the watches. If your user is allowed to read index a, but not index b, then the exact same set of rules will apply during execution of a watch.

    -

    When using the run watch API, the authorization data of the user that called the API will be used as a base, instead of the information who stored the watch.

    +

    When using the run watch API, the authorization data of the user that called the API will be used as a base, instead of the information who stored the watch. + Refer to the external documentation for examples of watch execution requests, including existing, customized, and inline watches.

    ``_ diff --git a/elasticsearch/_sync/client/__init__.py b/elasticsearch/_sync/client/__init__.py index cace6bd52..56e473bac 100644 --- a/elasticsearch/_sync/client/__init__.py +++ b/elasticsearch/_sync/client/__init__.py @@ -635,6 +635,8 @@ def bulk( Imagine a _bulk?refresh=wait_for request with three documents in it that happen to be routed to different shards in an index with five shards. The request will only wait for those three shards to refresh. The other two shards that make up the index do not participate in the _bulk request at all.

    +

    You might want to disable the refresh interval temporarily to improve indexing throughput for large bulk requests. + Refer to the linked documentation for step-by-step instructions using the index settings API.

    ``_ @@ -1643,7 +1645,7 @@ def delete_by_query( def delete_by_query_rethrottle( self, *, - task_id: t.Union[int, str], + task_id: str, error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, @@ -2322,7 +2324,7 @@ def get( :param index: The name of the index that contains the document. :param id: A unique document identifier. :param force_synthetic_source: Indicates whether the request forces synthetic - `_source`. Use this paramater to test if the mapping supports synthetic `_source` + `_source`. Use this parameter to test if the mapping supports synthetic `_source` and to get a sense of the worst case performance. Fetches with this parameter enabled will be slower than enabling synthetic source natively in the index. :param preference: The node or shard the operation should be performed on. By @@ -2353,8 +2355,8 @@ def get( :param stored_fields: A comma-separated list of stored fields to return as part of a hit. If no fields are specified, no stored fields are included in the response. If this field is specified, the `_source` parameter defaults to - `false`. Only leaf fields can be retrieved with the `stored_field` option. - Object fields can't be returned;​if specified, the request fails. + `false`. Only leaf fields can be retrieved with the `stored_fields` option. + Object fields can't be returned; if specified, the request fails. :param version: The version number for concurrency control. It must match the current version of the document for the request to succeed. :param version_type: The version type. @@ -3584,8 +3586,7 @@ def open_point_in_time( :param expand_wildcards: The type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. It supports comma-separated - values, such as `open,hidden`. Valid values are: `all`, `open`, `closed`, - `hidden`, `none`. + values, such as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param index_filter: Filter indices if the provided query rewrites to `match_none` @@ -3885,110 +3886,7 @@ def reindex( In this case, the response includes a count of the version conflicts that were encountered. Note that the handling of other error types is unaffected by the conflicts property. Additionally, if you opt to count version conflicts, the operation could attempt to reindex more documents from the source than max_docs until it has successfully indexed max_docs documents into the target or it has gone through every document in the source query.

    -

    NOTE: The reindex API makes no effort to handle ID collisions. - The last document written will "win" but the order isn't usually predictable so it is not a good idea to rely on this behavior. - Instead, make sure that IDs are unique by using a script.

    -

    Running reindex asynchronously

    -

    If the request contains wait_for_completion=false, Elasticsearch performs some preflight checks, launches the request, and returns a task you can use to cancel or get the status of the task. - Elasticsearch creates a record of this task as a document at _tasks/<task_id>.

    -

    Reindex from multiple sources

    -

    If you have many sources to reindex it is generally better to reindex them one at a time rather than using a glob pattern to pick up multiple sources. - That way you can resume the process if there are any errors by removing the partially completed source and starting over. - It also makes parallelizing the process fairly simple: split the list of sources to reindex and run each list in parallel.

    -

    For example, you can use a bash script like this:

    -
    for index in i1 i2 i3 i4 i5; do
    -            curl -HContent-Type:application/json -XPOST localhost:9200/_reindex?pretty -d'{
    -              "source": {
    -                "index": "'$index'"
    -              },
    -              "dest": {
    -                "index": "'$index'-reindexed"
    -              }
    -            }'
    -          done
    -          
    -

    Throttling

    -

    Set requests_per_second to any positive decimal number (1.4, 6, 1000, for example) to throttle the rate at which reindex issues batches of index operations. - Requests are throttled by padding each batch with a wait time. - To turn off throttling, set requests_per_second to -1.

    -

    The throttling is done by waiting between batches so that the scroll that reindex uses internally can be given a timeout that takes into account the padding. - The padding time is the difference between the batch size divided by the requests_per_second and the time spent writing. - By default the batch size is 1000, so if requests_per_second is set to 500:

    -
    target_time = 1000 / 500 per second = 2 seconds
    -          wait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds
    -          
    -

    Since the batch is issued as a single bulk request, large batch sizes cause Elasticsearch to create many requests and then wait for a while before starting the next set. - This is "bursty" instead of "smooth".

    -

    Slicing

    -

    Reindex supports sliced scroll to parallelize the reindexing process. - This parallelization can improve efficiency and provide a convenient way to break the request down into smaller parts.

    -

    NOTE: Reindexing from remote clusters does not support manual or automatic slicing.

    -

    You can slice a reindex request manually by providing a slice ID and total number of slices to each request. - You can also let reindex automatically parallelize by using sliced scroll to slice on _id. - The slices parameter specifies the number of slices to use.

    -

    Adding slices to the reindex request just automates the manual process, creating sub-requests which means it has some quirks:

    -
      -
    • You can see these requests in the tasks API. These sub-requests are "child" tasks of the task for the request with slices.
    • -
    • Fetching the status of the task for the request with slices only contains the status of completed slices.
    • -
    • These sub-requests are individually addressable for things like cancellation and rethrottling.
    • -
    • Rethrottling the request with slices will rethrottle the unfinished sub-request proportionally.
    • -
    • Canceling the request with slices will cancel each sub-request.
    • -
    • Due to the nature of slices, each sub-request won't get a perfectly even portion of the documents. All documents will be addressed, but some slices may be larger than others. Expect larger slices to have a more even distribution.
    • -
    • Parameters like requests_per_second and max_docs on a request with slices are distributed proportionally to each sub-request. Combine that with the previous point about distribution being uneven and you should conclude that using max_docs with slices might not result in exactly max_docs documents being reindexed.
    • -
    • Each sub-request gets a slightly different snapshot of the source, though these are all taken at approximately the same time.
    • -
    -

    If slicing automatically, setting slices to auto will choose a reasonable number for most indices. - If slicing manually or otherwise tuning automatic slicing, use the following guidelines.

    -

    Query performance is most efficient when the number of slices is equal to the number of shards in the index. - If that number is large (for example, 500), choose a lower number as too many slices will hurt performance. - Setting slices higher than the number of shards generally does not improve efficiency and adds overhead.

    -

    Indexing performance scales linearly across available resources with the number of slices.

    -

    Whether query or indexing performance dominates the runtime depends on the documents being reindexed and cluster resources.

    -

    Modify documents during reindexing

    -

    Like _update_by_query, reindex operations support a script that modifies the document. - Unlike _update_by_query, the script is allowed to modify the document's metadata.

    -

    Just as in _update_by_query, you can set ctx.op to change the operation that is run on the destination. - For example, set ctx.op to noop if your script decides that the document doesn’t have to be indexed in the destination. This "no operation" will be reported in the noop counter in the response body. - Set ctx.op to delete if your script decides that the document must be deleted from the destination. - The deletion will be reported in the deleted counter in the response body. - Setting ctx.op to anything else will return an error, as will setting any other field in ctx.

    -

    Think of the possibilities! Just be careful; you are able to change:

    -
      -
    • _id
    • -
    • _index
    • -
    • _version
    • -
    • _routing
    • -
    -

    Setting _version to null or clearing it from the ctx map is just like not sending the version in an indexing request. - It will cause the document to be overwritten in the destination regardless of the version on the target or the version type you use in the reindex API.

    -

    Reindex from remote

    -

    Reindex supports reindexing from a remote Elasticsearch cluster. - The host parameter must contain a scheme, host, port, and optional path. - The username and password parameters are optional and when they are present the reindex operation will connect to the remote Elasticsearch node using basic authentication. - Be sure to use HTTPS when using basic authentication or the password will be sent in plain text. - There are a range of settings available to configure the behavior of the HTTPS connection.

    -

    When using Elastic Cloud, it is also possible to authenticate against the remote cluster through the use of a valid API key. - Remote hosts must be explicitly allowed with the reindex.remote.whitelist setting. - It can be set to a comma delimited list of allowed remote host and port combinations. - Scheme is ignored; only the host and port are used. - For example:

    -
    reindex.remote.whitelist: [otherhost:9200, another:9200, 127.0.10.*:9200, localhost:*"]
    -          
    -

    The list of allowed hosts must be configured on any nodes that will coordinate the reindex. - This feature should work with remote clusters of any version of Elasticsearch. - This should enable you to upgrade from any version of Elasticsearch to the current version by reindexing from a cluster of the old version.

    -

    WARNING: Elasticsearch does not support forward compatibility across major versions. - For example, you cannot reindex from a 7.x cluster into a 6.x cluster.

    -

    To enable queries sent to older versions of Elasticsearch, the query parameter is sent directly to the remote host without validation or modification.

    -

    NOTE: Reindexing from remote clusters does not support manual or automatic slicing.

    -

    Reindexing from a remote server uses an on-heap buffer that defaults to a maximum size of 100mb. - If the remote index includes very large documents you'll need to use a smaller batch size. - It is also possible to set the socket read timeout on the remote connection with the socket_timeout field and the connection timeout with the connect_timeout field. - Both default to 30 seconds.

    -

    Configuring SSL parameters

    -

    Reindex from remote supports configurable SSL settings. - These must be specified in the elasticsearch.yml file, with the exception of the secure settings, which you add in the Elasticsearch keystore. - It is not possible to configure SSL in the body of the reindex request.

    +

    Refer to the linked documentation for examples of how to reindex documents.

    ``_ @@ -4650,11 +4548,11 @@ def search( of the specified nodes are available, select shards from any available node using the default method. * `_prefer_nodes:,` to if possible, run the search on the specified nodes IDs. If not, select shards using the - default method. `_shards:,` to run the search only on the specified - shards. You can combine this value with other `preference` values. However, - the `_shards` value must come first. For example: `_shards:2,3|_local`. `` - (any string that does not start with `_`) to route searches with the same - `` to the same shards in the same order. + default method. * `_shards:,` to run the search only on the + specified shards. You can combine this value with other `preference` values. + However, the `_shards` value must come first. For example: `_shards:2,3|_local`. + * `` (any string that does not start with `_`) to route searches + with the same `` to the same shards in the same order. :param profile: Set to `true` to return detailed timing information about the execution of individual components in a search request. NOTE: This is a debugging tool and adds significant overhead to search execution. @@ -4990,51 +4888,6 @@ def search_mvt(
  • Optionally, a geo_bounds aggregation on the <field>. The search only includes this aggregation if the exact_bounds parameter is true.
  • If the optional parameter with_labels is true, the internal search will include a dynamic runtime field that calls the getLabelPosition function of the geometry doc value. This enables the generation of new point features containing suggested geometry labels, so that, for example, multi-polygons will have only one label.
  • -

    For example, Elasticsearch may translate a vector tile search API request with a grid_agg argument of geotile and an exact_bounds argument of true into the following search

    -
    GET my-index/_search
    -          {
    -            "size": 10000,
    -            "query": {
    -              "geo_bounding_box": {
    -                "my-geo-field": {
    -                  "top_left": {
    -                    "lat": -40.979898069620134,
    -                    "lon": -45
    -                  },
    -                  "bottom_right": {
    -                    "lat": -66.51326044311186,
    -                    "lon": 0
    -                  }
    -                }
    -              }
    -            },
    -            "aggregations": {
    -              "grid": {
    -                "geotile_grid": {
    -                  "field": "my-geo-field",
    -                  "precision": 11,
    -                  "size": 65536,
    -                  "bounds": {
    -                    "top_left": {
    -                      "lat": -40.979898069620134,
    -                      "lon": -45
    -                    },
    -                    "bottom_right": {
    -                      "lat": -66.51326044311186,
    -                      "lon": 0
    -                    }
    -                  }
    -                }
    -              },
    -              "bounds": {
    -                "geo_bounds": {
    -                  "field": "my-geo-field",
    -                  "wrap_longitude": false
    -                }
    -              }
    -            }
    -          }
    -          

    The API returns results as a binary Mapbox vector tile. Mapbox vector tiles are encoded as Google Protobufs (PBF). By default, the tile contains three layers:

      @@ -5289,6 +5142,7 @@ def search_mvt( Some cells may intersect more than one vector tile. To compute the H3 resolution for each precision, Elasticsearch compares the average density of hexagonal bins at each resolution with the average density of tile bins at each zoom level. Elasticsearch uses the H3 resolution that is closest to the corresponding geotile density.

      +

      Learn how to use the vector tile search API with practical examples in the Vector tile search examples guide.

      ``_ @@ -5478,7 +5332,7 @@ def search_shards( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param local: If `true`, the request retrieves information from the local node @@ -5590,8 +5444,7 @@ def search_template( :param expand_wildcards: The type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated - values, such as `open,hidden`. Valid values are: `all`, `open`, `closed`, - `hidden`, `none`. + values, such as `open,hidden`. :param explain: If `true`, returns detailed information about score calculation as part of each hit. If you specify both this and the `explain` query parameter, the API uses only the query parameter. @@ -5865,7 +5718,8 @@ def termvectors( The information is only retrieved for the shard the requested document resides in. The term and field statistics are therefore only useful as relative measures whereas the absolute numbers have no meaning in this context. By default, when requesting term vectors of artificial documents, a shard to get the statistics from is randomly selected. - Use routing only to hit a particular shard.

      + Use routing only to hit a particular shard. + Refer to the linked documentation for detailed examples of how to use this API.

      ``_ @@ -6036,7 +5890,8 @@ def update(

    The document must still be reindexed, but using this API removes some network roundtrips and reduces chances of version conflicts between the GET and the index operation.

    The _source field must be enabled to use this API. - In addition to _source, you can access the following variables through the ctx map: _index, _type, _id, _version, _routing, and _now (the current timestamp).

    + In addition to _source, you can access the following variables through the ctx map: _index, _type, _id, _version, _routing, and _now (the current timestamp). + For usage examples such as partial updates, upserts, and scripted updates, see the External documentation.

    ``_ @@ -6229,6 +6084,24 @@ def update_by_query( A bulk update request is performed for each batch of matching documents. Any query or update failures cause the update by query request to fail and the failures are shown in the response. Any update requests that completed successfully still stick, they are not rolled back.

    +

    Refreshing shards

    +

    Specifying the refresh parameter refreshes all shards once the request completes. + This is different to the update API's refresh parameter, which causes only the shard + that received the request to be refreshed. Unlike the update API, it does not support + wait_for.

    +

    Running update by query asynchronously

    +

    If the request contains wait_for_completion=false, Elasticsearch + performs some preflight checks, launches the request, and returns a + task you can use to cancel or get the status of the task. + Elasticsearch creates a record of this task as a document at .tasks/task/${taskId}.

    +

    Waiting for active shards

    +

    wait_for_active_shards controls how many copies of a shard must be active + before proceeding with the request. See wait_for_active_shards + for details. timeout controls how long each write request waits for unavailable + shards to become available. Both work exactly the way they work in the + Bulk API. Update by query uses scrolled searches, so you can also + specify the scroll parameter to control how long it keeps the search context + alive, for example ?scroll=10m. The default is 5 minutes.

    Throttling update requests

    To control the rate at which update by query issues batches of update operations, you can set requests_per_second to any positive decimal number. This pads each batch with a wait time to throttle the rate. @@ -6263,18 +6136,8 @@ def update_by_query(

  • Query performance is most efficient when the number of slices is equal to the number of shards in the index or backing index. If that number is large (for example, 500), choose a lower number as too many slices hurts performance. Setting slices higher than the number of shards generally does not improve efficiency and adds overhead.
  • Update performance scales linearly across available resources with the number of slices.
  • -

    Whether query or update performance dominates the runtime depends on the documents being reindexed and cluster resources.

    -

    Update the document source

    -

    Update by query supports scripts to update the document source. - As with the update API, you can set ctx.op to change the operation that is performed.

    -

    Set ctx.op = "noop" if your script decides that it doesn't have to make any changes. - The update by query operation skips updating the document and increments the noop counter.

    -

    Set ctx.op = "delete" if your script decides that the document should be deleted. - The update by query operation deletes the document and increments the deleted counter.

    -

    Update by query supports only index, noop, and delete. - Setting ctx.op to anything else is an error. - Setting any other field in ctx is an error. - This API enables you to only modify the source of matching documents; you cannot move them.

    +

    Whether query or update performance dominates the runtime depends on the documents being reindexed and cluster resources. + Refer to the linked documentation for examples of how to update documents using the _update_by_query API:

    ``_ @@ -6302,8 +6165,7 @@ def update_by_query( :param expand_wildcards: The type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. It supports comma-separated - values, such as `open,hidden`. Valid values are: `all`, `open`, `closed`, - `hidden`, `none`. + values, such as `open,hidden`. :param from_: Skips the specified number of documents. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. diff --git a/elasticsearch/_sync/client/cat.py b/elasticsearch/_sync/client/cat.py index af14bf7f8..5fbf19f85 100644 --- a/elasticsearch/_sync/client/cat.py +++ b/elasticsearch/_sync/client/cat.py @@ -1767,7 +1767,200 @@ def nodes( filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, format: t.Optional[str] = None, full_id: t.Optional[t.Union[bool, str]] = None, - h: t.Optional[t.Union[str, t.Sequence[str]]] = None, + h: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[ + str, + t.Literal[ + "build", + "completion.size", + "cpu", + "disk.avail", + "disk.total", + "disk.used", + "disk.used_percent", + "fielddata.evictions", + "fielddata.memory_size", + "file_desc.current", + "file_desc.max", + "file_desc.percent", + "flush.total", + "flush.total_time", + "get.current", + "get.exists_time", + "get.exists_total", + "get.missing_time", + "get.missing_total", + "get.time", + "get.total", + "heap.current", + "heap.max", + "heap.percent", + "http_address", + "id", + "indexing.delete_current", + "indexing.delete_time", + "indexing.delete_total", + "indexing.index_current", + "indexing.index_failed", + "indexing.index_failed_due_to_version_conflict", + "indexing.index_time", + "indexing.index_total", + "ip", + "jdk", + "load_15m", + "load_1m", + "load_5m", + "mappings.total_count", + "mappings.total_estimated_overhead_in_bytes", + "master", + "merges.current", + "merges.current_docs", + "merges.current_size", + "merges.total", + "merges.total_docs", + "merges.total_size", + "merges.total_time", + "name", + "node.role", + "pid", + "port", + "query_cache.evictions", + "query_cache.hit_count", + "query_cache.memory_size", + "query_cache.miss_count", + "ram.current", + "ram.max", + "ram.percent", + "refresh.time", + "refresh.total", + "request_cache.evictions", + "request_cache.hit_count", + "request_cache.memory_size", + "request_cache.miss_count", + "script.cache_evictions", + "script.compilations", + "search.fetch_current", + "search.fetch_time", + "search.fetch_total", + "search.open_contexts", + "search.query_current", + "search.query_time", + "search.query_total", + "search.scroll_current", + "search.scroll_time", + "search.scroll_total", + "segments.count", + "segments.fixed_bitset_memory", + "segments.index_writer_memory", + "segments.memory", + "segments.version_map_memory", + "shard_stats.total_count", + "suggest.current", + "suggest.time", + "suggest.total", + "uptime", + "version", + ], + ] + ], + t.Union[ + str, + t.Literal[ + "build", + "completion.size", + "cpu", + "disk.avail", + "disk.total", + "disk.used", + "disk.used_percent", + "fielddata.evictions", + "fielddata.memory_size", + "file_desc.current", + "file_desc.max", + "file_desc.percent", + "flush.total", + "flush.total_time", + "get.current", + "get.exists_time", + "get.exists_total", + "get.missing_time", + "get.missing_total", + "get.time", + "get.total", + "heap.current", + "heap.max", + "heap.percent", + "http_address", + "id", + "indexing.delete_current", + "indexing.delete_time", + "indexing.delete_total", + "indexing.index_current", + "indexing.index_failed", + "indexing.index_failed_due_to_version_conflict", + "indexing.index_time", + "indexing.index_total", + "ip", + "jdk", + "load_15m", + "load_1m", + "load_5m", + "mappings.total_count", + "mappings.total_estimated_overhead_in_bytes", + "master", + "merges.current", + "merges.current_docs", + "merges.current_size", + "merges.total", + "merges.total_docs", + "merges.total_size", + "merges.total_time", + "name", + "node.role", + "pid", + "port", + "query_cache.evictions", + "query_cache.hit_count", + "query_cache.memory_size", + "query_cache.miss_count", + "ram.current", + "ram.max", + "ram.percent", + "refresh.time", + "refresh.total", + "request_cache.evictions", + "request_cache.hit_count", + "request_cache.memory_size", + "request_cache.miss_count", + "script.cache_evictions", + "script.compilations", + "search.fetch_current", + "search.fetch_time", + "search.fetch_total", + "search.open_contexts", + "search.query_current", + "search.query_time", + "search.query_total", + "search.scroll_current", + "search.scroll_time", + "search.scroll_total", + "segments.count", + "segments.fixed_bitset_memory", + "segments.index_writer_memory", + "segments.memory", + "segments.version_map_memory", + "shard_stats.total_count", + "suggest.current", + "suggest.time", + "suggest.total", + "uptime", + "version", + ], + ], + ] + ] = None, help: t.Optional[bool] = None, human: t.Optional[bool] = None, include_unloaded_segments: t.Optional[bool] = None, @@ -1794,16 +1987,17 @@ def nodes( to `text`, `json`, `cbor`, `yaml`, or `smile`. :param full_id: If `true`, return the full node ID. If `false`, return the shortened node ID. - :param h: List of columns to appear in the response. Supports simple wildcards. + :param h: A comma-separated list of columns names to display. It supports simple + wildcards. :param help: When set to `true` will output available columns. This option can't be combined with any other query string option. :param include_unloaded_segments: If true, the response includes information from segments that are not loaded into memory. - :param master_timeout: Period to wait for a connection to the master node. - :param s: List of columns that determine how the table should be sorted. Sorting - defaults to ascending and can be changed by setting `:asc` or `:desc` as - a suffix to the column name. - :param time: Unit used to display time values. + :param master_timeout: The period to wait for a connection to the master node. + :param s: A comma-separated list of column names or aliases that determines the + sort order. Sorting defaults to ascending and can be changed by setting `:asc` + or `:desc` as a suffix to the column name. + :param time: The unit used to display time values. :param v: When set to `true` will enable verbose output. """ __path_parts: t.Dict[str, str] = {} @@ -2022,7 +2216,74 @@ def recovery( error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, format: t.Optional[str] = None, - h: t.Optional[t.Union[str, t.Sequence[str]]] = None, + h: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[ + str, + t.Literal[ + "bytes", + "bytes_percent", + "bytes_recovered", + "bytes_total", + "files", + "files_percent", + "files_recovered", + "files_total", + "index", + "repository", + "shard", + "snapshot", + "source_host", + "source_node", + "stage", + "start_time", + "start_time_millis", + "stop_time", + "stop_time_millis", + "target_host", + "target_node", + "time", + "translog_ops", + "translog_ops_percent", + "translog_ops_recovered", + "type", + ], + ] + ], + t.Union[ + str, + t.Literal[ + "bytes", + "bytes_percent", + "bytes_recovered", + "bytes_total", + "files", + "files_percent", + "files_recovered", + "files_total", + "index", + "repository", + "shard", + "snapshot", + "source_host", + "source_node", + "stage", + "start_time", + "start_time_millis", + "stop_time", + "stop_time_millis", + "target_host", + "target_node", + "time", + "translog_ops", + "translog_ops_percent", + "translog_ops_recovered", + "type", + ], + ], + ] + ] = None, help: t.Optional[bool] = None, human: t.Optional[bool] = None, pretty: t.Optional[bool] = None, @@ -2053,13 +2314,14 @@ def recovery( shard recoveries. :param format: Specifies the format to return the columnar data in, can be set to `text`, `json`, `cbor`, `yaml`, or `smile`. - :param h: List of columns to appear in the response. Supports simple wildcards. + :param h: A comma-separated list of columns names to display. It supports simple + wildcards. :param help: When set to `true` will output available columns. This option can't be combined with any other query string option. - :param s: List of columns that determine how the table should be sorted. Sorting - defaults to ascending and can be changed by setting `:asc` or `:desc` as - a suffix to the column name. - :param time: Unit used to display time values. + :param s: A comma-separated list of column names or aliases that determines the + sort order. Sorting defaults to ascending and can be changed by setting `:asc` + or `:desc` as a suffix to the column name. + :param time: The unit used to display time values. :param v: When set to `true` will enable verbose output. """ __path_parts: t.Dict[str, str] @@ -2193,7 +2455,52 @@ def segments( error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, format: t.Optional[str] = None, - h: t.Optional[t.Union[str, t.Sequence[str]]] = None, + h: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[ + str, + t.Literal[ + "committed", + "compound", + "docs.count", + "docs.deleted", + "generation", + "id", + "index", + "ip", + "prirep", + "searchable", + "segment", + "shard", + "size", + "size.memory", + "version", + ], + ] + ], + t.Union[ + str, + t.Literal[ + "committed", + "compound", + "docs.count", + "docs.deleted", + "generation", + "id", + "index", + "ip", + "prirep", + "searchable", + "segment", + "shard", + "size", + "size.memory", + "version", + ], + ], + ] + ] = None, help: t.Optional[bool] = None, human: t.Optional[bool] = None, local: t.Optional[bool] = None, @@ -2219,7 +2526,8 @@ def segments( :param bytes: The unit used to display byte values. :param format: Specifies the format to return the columnar data in, can be set to `text`, `json`, `cbor`, `yaml`, or `smile`. - :param h: List of columns to appear in the response. Supports simple wildcards. + :param h: A comma-separated list of columns names to display. It supports simple + wildcards. :param help: When set to `true` will output available columns. This option can't be combined with any other query string option. :param local: If `true`, the request computes the list of selected nodes from @@ -2227,9 +2535,9 @@ def segments( from the cluster state of the master node. In both cases the coordinating node will send requests for further information to each selected node. :param master_timeout: Period to wait for a connection to the master node. - :param s: List of columns that determine how the table should be sorted. Sorting - defaults to ascending and can be changed by setting `:asc` or `:desc` as - a suffix to the column name. + :param s: A comma-separated list of column names or aliases that determines the + sort order. Sorting defaults to ascending and can be changed by setting `:asc` + or `:desc` as a suffix to the column name. :param v: When set to `true` will enable verbose output. """ __path_parts: t.Dict[str, str] @@ -2285,7 +2593,162 @@ def shards( error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, format: t.Optional[str] = None, - h: t.Optional[t.Union[str, t.Sequence[str]]] = None, + h: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[ + str, + t.Literal[ + "completion.size", + "dataset.size", + "dense_vector.value_count", + "docs", + "dsparse_vector.value_count", + "fielddata.evictions", + "fielddata.memory_size", + "flush.total", + "flush.total_time", + "get.current", + "get.exists_time", + "get.exists_total", + "get.missing_time", + "get.missing_total", + "get.time", + "get.total", + "id", + "index", + "indexing.delete_current", + "indexing.delete_time", + "indexing.delete_total", + "indexing.index_current", + "indexing.index_failed", + "indexing.index_failed_due_to_version_conflict", + "indexing.index_time", + "indexing.index_total", + "ip", + "merges.current", + "merges.current_docs", + "merges.current_size", + "merges.total", + "merges.total_docs", + "merges.total_size", + "merges.total_time", + "node", + "prirep", + "query_cache.evictions", + "query_cache.memory_size", + "recoverysource.type", + "refresh.time", + "refresh.total", + "search.fetch_current", + "search.fetch_time", + "search.fetch_total", + "search.open_contexts", + "search.query_current", + "search.query_time", + "search.query_total", + "search.scroll_current", + "search.scroll_time", + "search.scroll_total", + "segments.count", + "segments.fixed_bitset_memory", + "segments.index_writer_memory", + "segments.memory", + "segments.version_map_memory", + "seq_no.global_checkpoint", + "seq_no.local_checkpoint", + "seq_no.max", + "shard", + "state", + "store", + "suggest.current", + "suggest.time", + "suggest.total", + "sync_id", + "unassigned.at", + "unassigned.details", + "unassigned.for", + "unassigned.reason", + ], + ] + ], + t.Union[ + str, + t.Literal[ + "completion.size", + "dataset.size", + "dense_vector.value_count", + "docs", + "dsparse_vector.value_count", + "fielddata.evictions", + "fielddata.memory_size", + "flush.total", + "flush.total_time", + "get.current", + "get.exists_time", + "get.exists_total", + "get.missing_time", + "get.missing_total", + "get.time", + "get.total", + "id", + "index", + "indexing.delete_current", + "indexing.delete_time", + "indexing.delete_total", + "indexing.index_current", + "indexing.index_failed", + "indexing.index_failed_due_to_version_conflict", + "indexing.index_time", + "indexing.index_total", + "ip", + "merges.current", + "merges.current_docs", + "merges.current_size", + "merges.total", + "merges.total_docs", + "merges.total_size", + "merges.total_time", + "node", + "prirep", + "query_cache.evictions", + "query_cache.memory_size", + "recoverysource.type", + "refresh.time", + "refresh.total", + "search.fetch_current", + "search.fetch_time", + "search.fetch_total", + "search.open_contexts", + "search.query_current", + "search.query_time", + "search.query_total", + "search.scroll_current", + "search.scroll_time", + "search.scroll_total", + "segments.count", + "segments.fixed_bitset_memory", + "segments.index_writer_memory", + "segments.memory", + "segments.version_map_memory", + "seq_no.global_checkpoint", + "seq_no.local_checkpoint", + "seq_no.max", + "shard", + "state", + "store", + "suggest.current", + "suggest.time", + "suggest.total", + "sync_id", + "unassigned.at", + "unassigned.details", + "unassigned.for", + "unassigned.reason", + ], + ], + ] + ] = None, help: t.Optional[bool] = None, human: t.Optional[bool] = None, master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, @@ -2316,11 +2779,11 @@ def shards( :param h: List of columns to appear in the response. Supports simple wildcards. :param help: When set to `true` will output available columns. This option can't be combined with any other query string option. - :param master_timeout: Period to wait for a connection to the master node. - :param s: List of columns that determine how the table should be sorted. Sorting - defaults to ascending and can be changed by setting `:asc` or `:desc` as - a suffix to the column name. - :param time: Unit used to display time values. + :param master_timeout: The period to wait for a connection to the master node. + :param s: A comma-separated list of column names or aliases that determines the + sort order. Sorting defaults to ascending and can be changed by setting `:asc` + or `:desc` as a suffix to the column name. + :param time: The unit used to display time values. :param v: When set to `true` will enable verbose output. """ __path_parts: t.Dict[str, str] @@ -2373,7 +2836,124 @@ def snapshots( error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, format: t.Optional[str] = None, - h: t.Optional[t.Union[str, t.Sequence[str]]] = None, + h: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[ + str, + t.Literal[ + "build", + "completion.size", + "cpu", + "disk.avail", + "disk.total", + "disk.used", + "disk.used_percent", + "fielddata.evictions", + "fielddata.memory_size", + "file_desc.current", + "file_desc.max", + "file_desc.percent", + "flush.total", + "flush.total_time", + "get.current", + "get.exists_time", + "get.exists_total", + "get.missing_time", + "get.missing_total", + "get.time", + "get.total", + "heap.current", + "heap.max", + "heap.percent", + "http_address", + "id", + "indexing.delete_current", + "indexing.delete_time", + "indexing.delete_total", + "indexing.index_current", + "indexing.index_failed", + "indexing.index_failed_due_to_version_conflict", + "indexing.index_time", + "indexing.index_total", + "ip", + "jdk", + "load_15m", + "load_1m", + "load_5m", + "mappings.total_count", + "mappings.total_estimated_overhead_in_bytes", + "master", + "merges.current", + "merges.current_docs", + "merges.current_size", + "merges.total", + "merges.total_docs", + "merges.total_size", + "merges.total_time", + "name", + "node.role", + "pid", + "port", + "query_cache.evictions", + "query_cache.hit_count", + "query_cache.memory_size", + "query_cache.miss_count", + "ram.current", + "ram.max", + "ram.percent", + "refresh.time", + "refresh.total", + "request_cache.evictions", + "request_cache.hit_count", + "request_cache.memory_size", + "request_cache.miss_count", + "script.cache_evictions", + "script.compilations", + "search.fetch_current", + "search.fetch_time", + "search.fetch_total", + "search.open_contexts", + "search.query_current", + "search.query_time", + "search.query_total", + "search.scroll_current", + "search.scroll_time", + "search.scroll_total", + "segments.count", + "segments.fixed_bitset_memory", + "segments.index_writer_memory", + "segments.memory", + "segments.version_map_memory", + "shard_stats.total_count", + "suggest.current", + "suggest.time", + "suggest.total", + "uptime", + "version", + ], + ] + ], + t.Union[ + str, + t.Literal[ + "duration", + "end_epoch", + "end_time", + "failed_shards", + "id", + "indices", + "reason", + "repository", + "start_epoch", + "start_time", + "status", + "successful_shards", + "total_shards", + ], + ], + ] + ] = None, help: t.Optional[bool] = None, human: t.Optional[bool] = None, ignore_unavailable: t.Optional[bool] = None, @@ -2401,7 +2981,8 @@ def snapshots( If any repository fails during the request, Elasticsearch returns an error. :param format: Specifies the format to return the columnar data in, can be set to `text`, `json`, `cbor`, `yaml`, or `smile`. - :param h: List of columns to appear in the response. Supports simple wildcards. + :param h: A comma-separated list of columns names to display. It supports simple + wildcards. :param help: When set to `true` will output available columns. This option can't be combined with any other query string option. :param ignore_unavailable: If `true`, the response does not include information @@ -2648,7 +3229,62 @@ def thread_pool( error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, format: t.Optional[str] = None, - h: t.Optional[t.Union[str, t.Sequence[str]]] = None, + h: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[ + str, + t.Literal[ + "active", + "completed", + "core", + "ephemeral_id", + "host", + "ip", + "keep_alive", + "largest", + "max", + "name", + "node_id", + "node_name", + "pid", + "pool_size", + "port", + "queue", + "queue_size", + "rejected", + "size", + "type", + ], + ] + ], + t.Union[ + str, + t.Literal[ + "active", + "completed", + "core", + "ephemeral_id", + "host", + "ip", + "keep_alive", + "largest", + "max", + "name", + "node_id", + "node_name", + "pid", + "pool_size", + "port", + "queue", + "queue_size", + "rejected", + "size", + "type", + ], + ], + ] + ] = None, help: t.Optional[bool] = None, human: t.Optional[bool] = None, local: t.Optional[bool] = None, @@ -2682,10 +3318,10 @@ def thread_pool( the local cluster state. If `false` the list of selected nodes are computed from the cluster state of the master node. In both cases the coordinating node will send requests for further information to each selected node. - :param master_timeout: Period to wait for a connection to the master node. - :param s: List of columns that determine how the table should be sorted. Sorting - defaults to ascending and can be changed by setting `:asc` or `:desc` as - a suffix to the column name. + :param master_timeout: The period to wait for a connection to the master node. + :param s: A comma-separated list of column names or aliases that determines the + sort order. Sorting defaults to ascending and can be changed by setting `:asc` + or `:desc` as a suffix to the column name. :param time: The unit used to display time values. :param v: When set to `true` will enable verbose output. """ diff --git a/elasticsearch/_sync/client/cluster.py b/elasticsearch/_sync/client/cluster.py index f7b1269cb..611927eb9 100644 --- a/elasticsearch/_sync/client/cluster.py +++ b/elasticsearch/_sync/client/cluster.py @@ -51,7 +51,8 @@ def allocation_explain( Get explanations for shard allocations in the cluster. For unassigned shards, it provides an explanation for why the shard is unassigned. For assigned shards, it provides an explanation for why the shard is remaining on its current node and has not moved or rebalanced to another node. - This API can be very useful when attempting to diagnose why a shard is unassigned or why a shard continues to remain on its current node when you might expect otherwise.

    + This API can be very useful when attempting to diagnose why a shard is unassigned or why a shard continues to remain on its current node when you might expect otherwise. + Refer to the linked documentation for examples of how to troubleshoot allocation issues using this API.

    ``_ @@ -361,8 +362,8 @@ def get_settings( """ .. raw:: html -

    Get cluster-wide settings. - By default, it returns only settings that have been explicitly defined.

    +

    Get cluster-wide settings.

    +

    By default, it returns only settings that have been explicitly defined.

    ``_ @@ -870,9 +871,9 @@ def put_settings( :param flat_settings: Return settings in flat format (default: false) :param master_timeout: Explicit operation timeout for connection to master node - :param persistent: + :param persistent: The settings that persist after the cluster restarts. :param timeout: Explicit operation timeout - :param transient: + :param transient: The settings that do not persist after the cluster restarts. """ __path_parts: t.Dict[str, str] = {} __path = "/_cluster/settings" diff --git a/elasticsearch/_sync/client/esql.py b/elasticsearch/_sync/client/esql.py index 0897feb54..987b0868f 100644 --- a/elasticsearch/_sync/client/esql.py +++ b/elasticsearch/_sync/client/esql.py @@ -31,6 +31,8 @@ class EsqlClient(NamespacedClient): "columnar", "filter", "include_ccs_metadata", + "keep_alive", + "keep_on_completion", "locale", "params", "profile", @@ -88,7 +90,9 @@ def async_query( parameter, runs it, and returns the results. :param allow_partial_results: If `true`, partial results will be returned if there are shard failures, but the query can continue to execute on other - clusters and shards. + clusters and shards. If `false`, the query will fail if there are any failures. + To override the default behavior, you can set the `esql.query.allow_partial_results` + cluster setting to `false`. :param columnar: By default, ES|QL returns results as rows. For example, FROM returns each individual document as one row. For the JSON, YAML, CBOR and smile formats, ES|QL can return the results in a columnar fashion where one @@ -151,10 +155,6 @@ def async_query( __query["format"] = format if human is not None: __query["human"] = human - if keep_alive is not None: - __query["keep_alive"] = keep_alive - if keep_on_completion is not None: - __query["keep_on_completion"] = keep_on_completion if pretty is not None: __query["pretty"] = pretty if not __body: @@ -166,6 +166,10 @@ def async_query( __body["filter"] = filter if include_ccs_metadata is not None: __body["include_ccs_metadata"] = include_ccs_metadata + if keep_alive is not None: + __body["keep_alive"] = keep_alive + if keep_on_completion is not None: + __body["keep_on_completion"] = keep_on_completion if locale is not None: __body["locale"] = locale if params is not None: @@ -248,6 +252,14 @@ def async_query_get( drop_null_columns: t.Optional[bool] = None, error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + format: t.Optional[ + t.Union[ + str, + t.Literal[ + "arrow", "cbor", "csv", "json", "smile", "tsv", "txt", "yaml" + ], + ] + ] = None, human: t.Optional[bool] = None, keep_alive: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, pretty: t.Optional[bool] = None, @@ -273,6 +285,7 @@ def async_query_get( will be removed from the `columns` and `values` portion of the results. If `true`, the response will include an extra section under the name `all_columns` which has the name of all the columns. + :param format: A short version of the Accept header, for example `json` or `yaml`. :param keep_alive: The period for which the query and its results are stored in the cluster. When this period expires, the query and its results are deleted, even if the query is still ongoing. @@ -293,6 +306,8 @@ def async_query_get( __query["error_trace"] = error_trace if filter_path is not None: __query["filter_path"] = filter_path + if format is not None: + __query["format"] = format if human is not None: __query["human"] = human if keep_alive is not None: @@ -366,6 +381,87 @@ def async_query_stop( path_parts=__path_parts, ) + @_rewrite_parameters() + @_stability_warning(Stability.EXPERIMENTAL) + def get_query( + self, + *, + id: str, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Get a specific running ES|QL query information. + Returns an object extended information about a running ES|QL query.

    + + + :param id: The query ID + """ + if id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'id'") + __path_parts: t.Dict[str, str] = {"id": _quote(id)} + __path = f'/_query/queries/{__path_parts["id"]}' + __query: t.Dict[str, t.Any] = {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + __headers = {"accept": "application/json"} + return self.perform_request( # type: ignore[return-value] + "GET", + __path, + params=__query, + headers=__headers, + endpoint_id="esql.get_query", + path_parts=__path_parts, + ) + + @_rewrite_parameters() + @_stability_warning(Stability.EXPERIMENTAL) + def list_queries( + self, + *, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Get running ES|QL queries information. + Returns an object containing IDs and other information about the running ES|QL queries.

    + + """ + __path_parts: t.Dict[str, str] = {} + __path = "/_query/queries" + __query: t.Dict[str, t.Any] = {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + __headers = {"accept": "application/json"} + return self.perform_request( # type: ignore[return-value] + "GET", + __path, + params=__query, + headers=__headers, + endpoint_id="esql.list_queries", + path_parts=__path_parts, + ) + @_rewrite_parameters( body_fields=( "query", @@ -422,7 +518,9 @@ def query( parameter, runs it, and returns the results. :param allow_partial_results: If `true`, partial results will be returned if there are shard failures, but the query can continue to execute on other - clusters and shards. + clusters and shards. If `false`, the query will fail if there are any failures. + To override the default behavior, you can set the `esql.query.allow_partial_results` + cluster setting to `false`. :param columnar: By default, ES|QL returns results as rows. For example, FROM returns each individual document as one row. For the JSON, YAML, CBOR and smile formats, ES|QL can return the results in a columnar fashion where one diff --git a/elasticsearch/_sync/client/indices.py b/elasticsearch/_sync/client/indices.py index a63f0319f..512b7d73e 100644 --- a/elasticsearch/_sync/client/indices.py +++ b/elasticsearch/_sync/client/indices.py @@ -338,7 +338,7 @@ def clear_cache( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param fielddata: If `true`, clears the fields cache. Use the `fields` parameter to clear the cache of specific fields only. :param fields: Comma-separated list of field names used to limit the `fielddata` @@ -563,7 +563,7 @@ def close( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param master_timeout: Period to wait for a connection to the master node. If @@ -656,7 +656,15 @@ def create( ``_ - :param index: Name of the index you wish to create. + :param index: Name of the index you wish to create. Index names must meet the + following criteria: * Lowercase only * Cannot include `\\`, `/`, `*`, `?`, + `"`, `<`, `>`, `|`, ` ` (space character), `,`, or `#` * Indices prior to + 7.0 could contain a colon (`:`), but that has been deprecated and will not + be supported in later versions * Cannot start with `-`, `_`, or `+` * Cannot + be `.` or `..` * Cannot be longer than 255 bytes (note thtat it is bytes, + so multi-byte characters will reach the limit faster) * Names starting with + `.` are deprecated, except for hidden indices and internal indices managed + by plugins :param aliases: Aliases for the index. :param mappings: Mapping for fields in the index. If specified, this mapping can include: - Field names - Field data types - Mapping parameters @@ -942,7 +950,7 @@ def delete( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param master_timeout: Period to wait for a connection to the master node. If @@ -1173,6 +1181,71 @@ def delete_data_stream( path_parts=__path_parts, ) + @_rewrite_parameters() + def delete_data_stream_options( + self, + *, + name: t.Union[str, t.Sequence[str]], + error_trace: t.Optional[bool] = None, + expand_wildcards: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[str, t.Literal["all", "closed", "hidden", "none", "open"]] + ], + t.Union[str, t.Literal["all", "closed", "hidden", "none", "open"]], + ] + ] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + pretty: t.Optional[bool] = None, + timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Delete data stream options. + Removes the data stream options from a data stream.

    + + + ``_ + + :param name: A comma-separated list of data streams of which the data stream + options will be deleted; use `*` to get all data streams + :param expand_wildcards: Whether wildcard expressions should get expanded to + open or closed indices (default: open) + :param master_timeout: Specify timeout for connection to master + :param timeout: Explicit timestamp for the document + """ + if name in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'name'") + __path_parts: t.Dict[str, str] = {"name": _quote(name)} + __path = f'/_data_stream/{__path_parts["name"]}/_options' + __query: t.Dict[str, t.Any] = {} + if error_trace is not None: + __query["error_trace"] = error_trace + if expand_wildcards is not None: + __query["expand_wildcards"] = expand_wildcards + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if master_timeout is not None: + __query["master_timeout"] = master_timeout + if pretty is not None: + __query["pretty"] = pretty + if timeout is not None: + __query["timeout"] = timeout + __headers = {"accept": "application/json"} + return self.perform_request( # type: ignore[return-value] + "DELETE", + __path, + params=__query, + headers=__headers, + endpoint_id="indices.delete_data_stream_options", + path_parts=__path_parts, + ) + @_rewrite_parameters() def delete_index_template( self, @@ -1246,7 +1319,8 @@ def delete_template( """ .. raw:: html -

    Delete a legacy index template.

    +

    Delete a legacy index template. + IMPORTANT: This documentation is about legacy index templates, which are deprecated and will be replaced by the composable templates introduced in Elasticsearch 7.8.

    ``_ @@ -1486,7 +1560,7 @@ def exists( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param flat_settings: If `true`, returns settings in flat format. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. @@ -1570,7 +1644,7 @@ def exists_alias( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, requests that include a missing data stream or index in the target indices or data streams return an error. :param master_timeout: Period to wait for a connection to the master node. If @@ -1919,7 +1993,7 @@ def flush( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param force: If `true`, the request forces a flush even if there are no changes to commit to the index. :param ignore_unavailable: If `false`, the request returns an error if it targets @@ -2237,7 +2311,7 @@ def get_alias( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param master_timeout: Period to wait for a connection to the master node. If @@ -2317,8 +2391,7 @@ def get_data_lifecycle( wildcards (`*`). To target all data streams, omit this parameter or use `*` or `_all`. :param expand_wildcards: Type of data stream that wildcard patterns can match. - Supports comma-separated values, such as `open,hidden`. Valid values are: - `all`, `open`, `closed`, `hidden`, `none`. + Supports comma-separated values, such as `open,hidden`. :param include_defaults: If `true`, return all default settings in the response. :param master_timeout: Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and @@ -2469,6 +2542,121 @@ def get_data_stream( path_parts=__path_parts, ) + @_rewrite_parameters() + def get_data_stream_options( + self, + *, + name: t.Union[str, t.Sequence[str]], + error_trace: t.Optional[bool] = None, + expand_wildcards: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[str, t.Literal["all", "closed", "hidden", "none", "open"]] + ], + t.Union[str, t.Literal["all", "closed", "hidden", "none", "open"]], + ] + ] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + pretty: t.Optional[bool] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Get data stream options.

    +

    Get the data stream options configuration of one or more data streams.

    + + + ``_ + + :param name: Comma-separated list of data streams to limit the request. Supports + wildcards (`*`). To target all data streams, omit this parameter or use `*` + or `_all`. + :param expand_wildcards: Type of data stream that wildcard patterns can match. + Supports comma-separated values, such as `open,hidden`. + :param master_timeout: Period to wait for a connection to the master node. If + no response is received before the timeout expires, the request fails and + returns an error. + """ + if name in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'name'") + __path_parts: t.Dict[str, str] = {"name": _quote(name)} + __path = f'/_data_stream/{__path_parts["name"]}/_options' + __query: t.Dict[str, t.Any] = {} + if error_trace is not None: + __query["error_trace"] = error_trace + if expand_wildcards is not None: + __query["expand_wildcards"] = expand_wildcards + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if master_timeout is not None: + __query["master_timeout"] = master_timeout + if pretty is not None: + __query["pretty"] = pretty + __headers = {"accept": "application/json"} + return self.perform_request( # type: ignore[return-value] + "GET", + __path, + params=__query, + headers=__headers, + endpoint_id="indices.get_data_stream_options", + path_parts=__path_parts, + ) + + @_rewrite_parameters() + def get_data_stream_settings( + self, + *, + name: t.Union[str, t.Sequence[str]], + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + pretty: t.Optional[bool] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Get data stream settings.

    +

    Get setting information for one or more data streams.

    + + + ``_ + + :param name: A comma-separated list of data streams or data stream patterns. + Supports wildcards (`*`). + :param master_timeout: The period to wait for a connection to the master node. + If no response is received before the timeout expires, the request fails + and returns an error. + """ + if name in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'name'") + __path_parts: t.Dict[str, str] = {"name": _quote(name)} + __path = f'/_data_stream/{__path_parts["name"]}/_settings' + __query: t.Dict[str, t.Any] = {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if master_timeout is not None: + __query["master_timeout"] = master_timeout + if pretty is not None: + __query["pretty"] = pretty + __headers = {"accept": "application/json"} + return self.perform_request( # type: ignore[return-value] + "GET", + __path, + params=__query, + headers=__headers, + endpoint_id="indices.get_data_stream_settings", + path_parts=__path_parts, + ) + @_rewrite_parameters() def get_field_mapping( self, @@ -2513,7 +2701,7 @@ def get_field_mapping( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param include_defaults: If `true`, return all default settings in the response. @@ -2665,7 +2853,7 @@ def get_mapping( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param local: If `true`, the request retrieves information from the local node @@ -2875,7 +3063,7 @@ def get_template( """ .. raw:: html -

    Get index templates. +

    Get legacy index templates. Get information about one or more index templates.

    IMPORTANT: This documentation is about legacy index templates, which are deprecated and will be replaced by the composable templates introduced in Elasticsearch 7.8.

    @@ -3157,7 +3345,7 @@ def open( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param master_timeout: Period to wait for a connection to the master node. If @@ -3416,8 +3604,7 @@ def put_data_lifecycle( for this data stream. A data stream lifecycle that's disabled (enabled: `false`) will have no effect on the data stream. :param expand_wildcards: Type of data stream that wildcard patterns can match. - Supports comma-separated values, such as `open,hidden`. Valid values are: - `all`, `hidden`, `open`, `closed`, `none`. + Supports comma-separated values, such as `open,hidden`. :param master_timeout: Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and returns an error. @@ -3466,6 +3653,167 @@ def put_data_lifecycle( path_parts=__path_parts, ) + @_rewrite_parameters( + body_fields=("failure_store",), + ) + def put_data_stream_options( + self, + *, + name: t.Union[str, t.Sequence[str]], + error_trace: t.Optional[bool] = None, + expand_wildcards: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[str, t.Literal["all", "closed", "hidden", "none", "open"]] + ], + t.Union[str, t.Literal["all", "closed", "hidden", "none", "open"]], + ] + ] = None, + failure_store: t.Optional[t.Mapping[str, t.Any]] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + pretty: t.Optional[bool] = None, + timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Update data stream options. + Update the data stream options of the specified data streams.

    + + + ``_ + + :param name: Comma-separated list of data streams used to limit the request. + Supports wildcards (`*`). To target all data streams use `*` or `_all`. + :param expand_wildcards: Type of data stream that wildcard patterns can match. + Supports comma-separated values, such as `open,hidden`. + :param failure_store: If defined, it will update the failure store configuration + of every data stream resolved by the name expression. + :param master_timeout: Period to wait for a connection to the master node. If + no response is received before the timeout expires, the request fails and + returns an error. + :param timeout: Period to wait for a response. If no response is received before + the timeout expires, the request fails and returns an error. + """ + if name in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'name'") + __path_parts: t.Dict[str, str] = {"name": _quote(name)} + __path = f'/_data_stream/{__path_parts["name"]}/_options' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if expand_wildcards is not None: + __query["expand_wildcards"] = expand_wildcards + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if master_timeout is not None: + __query["master_timeout"] = master_timeout + if pretty is not None: + __query["pretty"] = pretty + if timeout is not None: + __query["timeout"] = timeout + if not __body: + if failure_store is not None: + __body["failure_store"] = failure_store + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="indices.put_data_stream_options", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_name="settings", + ) + def put_data_stream_settings( + self, + *, + name: t.Union[str, t.Sequence[str]], + settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Mapping[str, t.Any]] = None, + dry_run: t.Optional[bool] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + pretty: t.Optional[bool] = None, + timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Update data stream settings.

    +

    This API can be used to override settings on specific data streams. These overrides will take precedence over what + is specified in the template that the data stream matches. To prevent your data stream from getting into an invalid state, + only certain settings are allowed. If possible, the setting change is applied to all + backing indices. Otherwise, it will be applied when the data stream is next rolled over.

    + + + ``_ + + :param name: A comma-separated list of data streams or data stream patterns. + :param settings: + :param dry_run: If `true`, the request does not actually change the settings + on any data streams or indices. Instead, it simulates changing the settings + and reports back to the user what would have happened had these settings + actually been applied. + :param master_timeout: The period to wait for a connection to the master node. + If no response is received before the timeout expires, the request fails + and returns an error. + :param timeout: The period to wait for a response. If no response is received + before the timeout expires, the request fails and returns an error. + """ + if name in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'name'") + if settings is None and body is None: + raise ValueError( + "Empty value passed for parameters 'settings' and 'body', one of them should be set." + ) + elif settings is not None and body is not None: + raise ValueError("Cannot set both 'settings' and 'body'") + __path_parts: t.Dict[str, str] = {"name": _quote(name)} + __path = f'/_data_stream/{__path_parts["name"]}/_settings' + __query: t.Dict[str, t.Any] = {} + if dry_run is not None: + __query["dry_run"] = dry_run + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if master_timeout is not None: + __query["master_timeout"] = master_timeout + if pretty is not None: + __query["pretty"] = pretty + if timeout is not None: + __query["timeout"] = timeout + __body = settings if settings is not None else body + __headers = {"accept": "application/json", "content-type": "application/json"} + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="indices.put_data_stream_settings", + path_parts=__path_parts, + ) + @_rewrite_parameters( body_fields=( "allow_auto_create", @@ -3693,24 +4041,17 @@ def put_mapping(

    Update field mappings. Add new fields to an existing data stream or index. - You can also use this API to change the search settings of existing fields and add new properties to existing object fields. - For data streams, these changes are applied to all backing indices by default.

    -

    Add multi-fields to an existing field

    -

    Multi-fields let you index the same field in different ways. - You can use this API to update the fields mapping parameter and enable multi-fields for an existing field. - WARNING: If an index (or data stream) contains documents when you add a multi-field, those documents will not have values for the new multi-field. - You can populate the new multi-field with the update by query API.

    -

    Change supported mapping parameters for an existing field

    -

    The documentation for each mapping parameter indicates whether you can update it for an existing field using this API. - For example, you can use the update mapping API to update the ignore_above parameter.

    -

    Change the mapping of an existing field

    -

    Except for supported mapping parameters, you can't change the mapping or field type of an existing field. - Changing an existing field could invalidate data that's already indexed.

    -

    If you need to change the mapping of a field in a data stream's backing indices, refer to documentation about modifying data streams. - If you need to change the mapping of a field in other indices, create a new index with the correct mapping and reindex your data into that index.

    -

    Rename a field

    -

    Renaming a field would invalidate data already indexed under the old field name. - Instead, add an alias field to create an alternate field name.

    + You can use the update mapping API to:

    +
      +
    • Add a new field to an existing index
    • +
    • Update mappings for multiple indices in a single request
    • +
    • Add new properties to an object field
    • +
    • Enable multi-fields for an existing field
    • +
    • Update supported mapping parameters
    • +
    • Change a field's mapping using reindexing
    • +
    • Rename a field using a field alias
    • +
    +

    Learn how to use the update mapping API with practical examples in the Update mapping API examples guide.

    ``_ @@ -3729,7 +4070,7 @@ def put_mapping( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param field_names: Control whether field names are enabled for the index. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. @@ -3847,8 +4188,36 @@ def put_settings( Changes dynamic index settings in real time. For data streams, index setting changes are applied to all backing indices by default.

    To revert a setting to the default value, use a null value. - The list of per-index settings that can be updated dynamically on live indices can be found in index module documentation. + The list of per-index settings that can be updated dynamically on live indices can be found in index settings documentation. To preserve existing settings from being updated, set the preserve_existing parameter to true.

    +

    For performance optimization during bulk indexing, you can disable the refresh interval. + Refer to disable refresh interval for an example. + There are multiple valid ways to represent index settings in the request body. You can specify only the setting, for example:

    +
    {
    +            "number_of_replicas": 1
    +          }
    +          
    +

    Or you can use an index setting object:

    +
    {
    +            "index": {
    +              "number_of_replicas": 1
    +            }
    +          }
    +          
    +

    Or you can use dot annotation:

    +
    {
    +            "index.number_of_replicas": 1
    +          }
    +          
    +

    Or you can embed any of the aforementioned options in a settings object. For example:

    +
    {
    +            "settings": {
    +              "index": {
    +                "number_of_replicas": 1
    +              }
    +            }
    +          }
    +          

    NOTE: You can only define new analyzers on closed indices. To add an analyzer, you must close the index, define the analyzer, and reopen the index. You cannot close the write index of a data stream. @@ -3856,7 +4225,8 @@ def put_settings( Then roll over the data stream to apply the new analyzer to the stream's write index and future backing indices. This affects searches and any new data added to the stream after the rollover. However, it does not affect the data stream's backing indices or their existing data. - To change the analyzer for existing backing indices, you must create a new data stream and reindex your data into it.

    + To change the analyzer for existing backing indices, you must create a new data stream and reindex your data into it. + Refer to updating analyzers on existing indices for step-by-step examples.

    ``_ @@ -3968,7 +4338,7 @@ def put_template( """ .. raw:: html -

    Create or update an index template. +

    Create or update a legacy index template. Index templates define settings, mappings, and aliases that can be applied automatically to new indices. Elasticsearch applies templates to new indices based on an index pattern that matches the index name.

    IMPORTANT: This documentation is about legacy index templates, which are deprecated and will be replaced by the composable templates introduced in Elasticsearch 7.8.

    @@ -4172,7 +4542,7 @@ def refresh( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. """ @@ -4288,6 +4658,105 @@ def reload_search_analyzers( path_parts=__path_parts, ) + @_rewrite_parameters() + def remove_block( + self, + *, + index: str, + block: t.Union[str, t.Literal["metadata", "read", "read_only", "write"]], + allow_no_indices: t.Optional[bool] = None, + error_trace: t.Optional[bool] = None, + expand_wildcards: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[str, t.Literal["all", "closed", "hidden", "none", "open"]] + ], + t.Union[str, t.Literal["all", "closed", "hidden", "none", "open"]], + ] + ] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + ignore_unavailable: t.Optional[bool] = None, + master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + pretty: t.Optional[bool] = None, + timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Remove an index block.

    +

    Remove an index block from an index. + Index blocks limit the operations allowed on an index by blocking specific operation types.

    + + + ``_ + + :param index: A comma-separated list or wildcard expression of index names used + to limit the request. By default, you must explicitly name the indices you + are removing blocks from. To allow the removal of blocks from indices with + `_all`, `*`, or other wildcard expressions, change the `action.destructive_requires_name` + setting to `false`. You can update this setting in the `elasticsearch.yml` + file or by using the cluster update settings API. + :param block: The block type to remove from the index. + :param allow_no_indices: If `false`, the request returns an error if any wildcard + expression, index alias, or `_all` value targets only missing or closed indices. + This behavior applies even if the request targets other open indices. For + example, a request targeting `foo*,bar*` returns an error if an index starts + with `foo` but no index starts with `bar`. + :param expand_wildcards: The type of index that wildcard patterns can match. + If the request can target data streams, this argument determines whether + wildcard expressions match hidden data streams. It supports comma-separated + values, such as `open,hidden`. + :param ignore_unavailable: If `false`, the request returns an error if it targets + a missing or closed index. + :param master_timeout: The period to wait for the master node. If the master + node is not available before the timeout expires, the request fails and returns + an error. It can also be set to `-1` to indicate that the request should + never timeout. + :param timeout: The period to wait for a response from all relevant nodes in + the cluster after updating the cluster metadata. If no response is received + before the timeout expires, the cluster metadata update still applies but + the response will indicate that it was not completely acknowledged. It can + also be set to `-1` to indicate that the request should never timeout. + """ + if index in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'index'") + if block in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'block'") + __path_parts: t.Dict[str, str] = { + "index": _quote(index), + "block": _quote(block), + } + __path = f'/{__path_parts["index"]}/_block/{__path_parts["block"]}' + __query: t.Dict[str, t.Any] = {} + if allow_no_indices is not None: + __query["allow_no_indices"] = allow_no_indices + if error_trace is not None: + __query["error_trace"] = error_trace + if expand_wildcards is not None: + __query["expand_wildcards"] = expand_wildcards + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if ignore_unavailable is not None: + __query["ignore_unavailable"] = ignore_unavailable + if master_timeout is not None: + __query["master_timeout"] = master_timeout + if pretty is not None: + __query["pretty"] = pretty + if timeout is not None: + __query["timeout"] = timeout + __headers = {"accept": "application/json"} + return self.perform_request( # type: ignore[return-value] + "DELETE", + __path, + params=__query, + headers=__headers, + endpoint_id="indices.remove_block", + path_parts=__path_parts, + ) + @_rewrite_parameters() def resolve_cluster( self, @@ -4371,10 +4840,9 @@ def resolve_cluster( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. - NOTE: This option is only supported when specifying an index expression. - You will get an error if you specify index options to the `_resolve/cluster` - API endpoint that takes no index expression. + as `open,hidden`. NOTE: This option is only supported when specifying an + index expression. You will get an error if you specify index options to the + `_resolve/cluster` API endpoint that takes no index expression. :param ignore_throttled: If true, concrete, expanded, or aliased indices are ignored when frozen. NOTE: This option is only supported when specifying an index expression. You will get an error if you specify index options to @@ -4467,7 +4935,7 @@ def resolve_index( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. """ @@ -4681,7 +5149,7 @@ def segments( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. """ @@ -5505,7 +5973,7 @@ def validate_query( :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such - as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. + as `open,hidden`. :param explain: If `true`, the response returns detailed information if an error has occurred. :param ignore_unavailable: If `false`, the request returns an error if it targets diff --git a/elasticsearch/_sync/client/inference.py b/elasticsearch/_sync/client/inference.py index 1826c5d51..a6ecbe958 100644 --- a/elasticsearch/_sync/client/inference.py +++ b/elasticsearch/_sync/client/inference.py @@ -370,22 +370,38 @@ def put( """ .. raw:: html -

    Create an inference endpoint. - When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    +

    Create an inference endpoint.

    IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.

    +

    The following integrations are available through the inference API. You can find the available task types next to the integration name:

    +
      +
    • AlibabaCloud AI Search (completion, rerank, sparse_embedding, text_embedding)
    • +
    • Amazon Bedrock (completion, text_embedding)
    • +
    • Anthropic (completion)
    • +
    • Azure AI Studio (completion, text_embedding)
    • +
    • Azure OpenAI (completion, text_embedding)
    • +
    • Cohere (completion, rerank, text_embedding)
    • +
    • DeepSeek (completion, chat_completion)
    • +
    • Elasticsearch (rerank, sparse_embedding, text_embedding - this service is for built-in models and models uploaded through Eland)
    • +
    • ELSER (sparse_embedding)
    • +
    • Google AI Studio (completion, text_embedding)
    • +
    • Google Vertex AI (rerank, text_embedding)
    • +
    • Hugging Face (chat_completion, completion, rerank, text_embedding)
    • +
    • Mistral (chat_completion, completion, text_embedding)
    • +
    • OpenAI (chat_completion, completion, text_embedding)
    • +
    • VoyageAI (text_embedding, rerank)
    • +
    • Watsonx inference integration (text_embedding)
    • +
    • JinaAI (text_embedding, rerank)
    • +
    ``_ :param inference_id: The inference Id :param inference_config: - :param task_type: The task type + :param task_type: The task type. Refer to the integration list in the API description + for the available task types. """ if inference_id in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'inference_id'") @@ -458,11 +474,6 @@ def put_alibabacloud(

    Create an AlibabaCloud AI Search inference endpoint.

    Create an inference endpoint to perform an inference task with the alibabacloud-ai-search service.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -553,16 +564,11 @@ def put_amazonbedrock( .. raw:: html

    Create an Amazon Bedrock inference endpoint.

    -

    Creates an inference endpoint to perform an inference task with the amazonbedrock service.

    +

    Create an inference endpoint to perform an inference task with the amazonbedrock service.

    info You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -654,11 +660,6 @@ def put_anthropic(

    Create an Anthropic inference endpoint.

    Create an inference endpoint to perform an inference task with the anthropic service.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -751,11 +752,6 @@ def put_azureaistudio(

    Create an Azure AI studio inference endpoint.

    Create an inference endpoint to perform an inference task with the azureaistudio service.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -853,11 +849,6 @@ def put_azureopenai(
  • GPT-3.5
  • The list of embeddings models that you can choose from in your deployment can be found in the Azure models documentation.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -951,11 +942,6 @@ def put_cohere(

    Create a Cohere inference endpoint.

    Create an inference endpoint to perform an inference task with the cohere service.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -1017,6 +1003,85 @@ def put_cohere( path_parts=__path_parts, ) + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + def put_deepseek( + self, + *, + task_type: t.Union[str, t.Literal["chat_completion", "completion"]], + deepseek_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["deepseek"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Create a DeepSeek inference endpoint.

    +

    Create an inference endpoint to perform an inference task with the deepseek service.

    + + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param deepseek_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `deepseek`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `deepseek` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if deepseek_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'deepseek_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "deepseek_inference_id": _quote(deepseek_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["deepseek_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_deepseek", + path_parts=__path_parts, + ) + @_rewrite_parameters( body_fields=( "service", @@ -1239,11 +1304,6 @@ def put_googleaistudio(

    Create an Google AI Studio inference endpoint.

    Create an inference endpoint to perform an inference task with the googleaistudio service.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -1314,7 +1374,9 @@ def put_googleaistudio( def put_googlevertexai( self, *, - task_type: t.Union[str, t.Literal["rerank", "text_embedding"]], + task_type: t.Union[ + str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"] + ], googlevertexai_inference_id: str, service: t.Optional[t.Union[str, t.Literal["googlevertexai"]]] = None, service_settings: t.Optional[t.Mapping[str, t.Any]] = None, @@ -1331,11 +1393,6 @@ def put_googlevertexai(

    Create a Google Vertex AI inference endpoint.

    Create an inference endpoint to perform an inference task with the googlevertexai service.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -1400,12 +1457,19 @@ def put_googlevertexai( ) @_rewrite_parameters( - body_fields=("service", "service_settings", "chunking_settings"), + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), ) def put_hugging_face( self, *, - task_type: t.Union[str, t.Literal["text_embedding"]], + task_type: t.Union[ + str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"] + ], huggingface_inference_id: str, service: t.Optional[t.Union[str, t.Literal["hugging_face"]]] = None, service_settings: t.Optional[t.Mapping[str, t.Any]] = None, @@ -1414,17 +1478,21 @@ def put_hugging_face( filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, body: t.Optional[t.Dict[str, t.Any]] = None, ) -> ObjectApiResponse[t.Any]: """ .. raw:: html

    Create a Hugging Face inference endpoint.

    -

    Create an inference endpoint to perform an inference task with the hugging_face service.

    -

    You must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL. - Select the model you want to use on the new endpoint creation page (for example intfloat/e5-small-v2), then select the sentence embeddings task under the advanced configuration section. - Create the endpoint and copy the URL after the endpoint initialization has been finished.

    -

    The following models are recommended for the Hugging Face service:

    +

    Create an inference endpoint to perform an inference task with the hugging_face service. + Supported tasks include: text_embedding, completion, and chat_completion.

    +

    To configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint. + Select a model that supports the task you intend to use.

    +

    For Elastic's text_embedding task: + The selected model must support the Sentence Embeddings task. On the new endpoint creation page, select the Sentence Embeddings task under the Advanced Configuration section. + After the endpoint has initialized, copy the generated endpoint URL. + Recommended models for text_embedding task:

    • all-MiniLM-L6-v2
    • all-MiniLM-L12-v2
    • @@ -1434,11 +1502,24 @@ def put_hugging_face(
    • multilingual-e5-base
    • multilingual-e5-small
    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    +

    For Elastic's chat_completion and completion tasks: + The selected model must support the Text Generation task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for Text Generation. When creating dedicated endpoint select the Text Generation task. + After the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes /v1/chat/completions part in URL. Then, copy the full endpoint URL for use. + Recommended models for chat_completion and completion tasks:

    +
      +
    • Mistral-7B-Instruct-v0.2
    • +
    • QwQ-32B
    • +
    • Phi-3-mini-128k-instruct
    • +
    +

    For Elastic's rerank task: + The selected model must support the sentence-ranking task and expose OpenAI API. + HuggingFace supports only dedicated (not serverless) endpoints for Rerank so far. + After the endpoint is initialized, copy the full endpoint URL for use. + Tested models for rerank task:

    +
      +
    • bge-reranker-base
    • +
    • jina-reranker-v1-turbo-en-GGUF
    • +
    ``_ @@ -1450,6 +1531,8 @@ def put_hugging_face( :param service_settings: Settings used to install the inference model. These settings are specific to the `hugging_face` service. :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. """ if task_type in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'task_type'") @@ -1483,6 +1566,8 @@ def put_hugging_face( __body["service_settings"] = service_settings if chunking_settings is not None: __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings if not __body: __body = None # type: ignore[assignment] __headers = {"accept": "application/json"} @@ -1528,11 +1613,6 @@ def put_jinaai(

    Create an inference endpoint to perform an inference task with the jinaai service.

    To review the available rerank models, refer to https://jina.ai/reranker. To review the available text_embedding models, refer to the https://jina.ai/embeddings/.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -1600,7 +1680,9 @@ def put_jinaai( def put_mistral( self, *, - task_type: t.Union[str, t.Literal["text_embedding"]], + task_type: t.Union[ + str, t.Literal["chat_completion", "completion", "text_embedding"] + ], mistral_inference_id: str, service: t.Optional[t.Union[str, t.Literal["mistral"]]] = None, service_settings: t.Optional[t.Mapping[str, t.Any]] = None, @@ -1615,18 +1697,12 @@ def put_mistral( .. raw:: html

    Create a Mistral inference endpoint.

    -

    Creates an inference endpoint to perform an inference task with the mistral service.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    +

    Create an inference endpoint to perform an inference task with the mistral service.

    ``_ - :param task_type: The task type. The only valid task type for the model to perform - is `text_embedding`. + :param task_type: The type of the inference task that the model will perform. :param mistral_inference_id: The unique identifier of the inference endpoint. :param service: The type of service supported for the specified task type. In this case, `mistral`. @@ -1709,11 +1785,6 @@ def put_openai(

    Create an OpenAI inference endpoint.

    Create an inference endpoint to perform an inference task with the openai service or openai compatible APIs.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ @@ -1873,7 +1944,9 @@ def put_voyageai( def put_watsonx( self, *, - task_type: t.Union[str, t.Literal["text_embedding"]], + task_type: t.Union[ + str, t.Literal["chat_completion", "completion", "text_embedding"] + ], watsonx_inference_id: str, service: t.Optional[t.Union[str, t.Literal["watsonxai"]]] = None, service_settings: t.Optional[t.Mapping[str, t.Any]] = None, @@ -1890,17 +1963,11 @@ def put_watsonx(

    Create an inference endpoint to perform an inference task with the watsonxai service. You need an IBM Cloud Databases for Elasticsearch deployment to use the watsonxai inference service. You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.

    -

    When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. - After creating the endpoint, wait for the model deployment to complete before using it. - To verify the deployment status, use the get trained model statistics API. - Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". - Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

    ``_ - :param task_type: The task type. The only valid task type for the model to perform - is `text_embedding`. + :param task_type: The type of the inference task that the model will perform. :param watsonx_inference_id: The unique identifier of the inference endpoint. :param service: The type of service supported for the specified task type. In this case, `watsonxai`. diff --git a/elasticsearch/_sync/client/ml.py b/elasticsearch/_sync/client/ml.py index f2333c24a..690197642 100644 --- a/elasticsearch/_sync/client/ml.py +++ b/elasticsearch/_sync/client/ml.py @@ -1676,7 +1676,7 @@ def get_data_frame_analytics_stats( """ .. raw:: html -

    Get data frame analytics jobs usage info.

    +

    Get data frame analytics job stats.

    ``_ @@ -1744,7 +1744,7 @@ def get_datafeed_stats( """ .. raw:: html -

    Get datafeeds usage info. +

    Get datafeed stats. You can get statistics for multiple datafeeds in a single API request by using a comma-separated list of datafeeds or a wildcard expression. You can get statistics for all datafeeds by using _all, by specifying * as the @@ -2033,7 +2033,7 @@ def get_job_stats( """ .. raw:: html -

    Get anomaly detection jobs usage info.

    +

    Get anomaly detection job stats.

    ``_ @@ -3871,13 +3871,7 @@ def put_job( :param description: A description of the job. :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard - expressions match hidden data streams. Supports comma-separated values. Valid - values are: * `all`: Match any data stream or index, including hidden ones. - * `closed`: Match closed, non-hidden indices. Also matches any non-hidden - data stream. Data streams cannot be closed. * `hidden`: Match hidden data - streams and hidden indices. Must be combined with `open`, `closed`, or both. - * `none`: Wildcard patterns are not accepted. * `open`: Match open, non-hidden - indices. Also matches any non-hidden data stream. + expressions match hidden data streams. Supports comma-separated values. :param groups: A list of job groups. A job can belong to no groups or many. :param ignore_throttled: If `true`, concrete, expanded or aliased indices are ignored when frozen. @@ -5140,13 +5134,7 @@ def update_datafeed( check runs only on real-time datafeeds. :param expand_wildcards: Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard - expressions match hidden data streams. Supports comma-separated values. Valid - values are: * `all`: Match any data stream or index, including hidden ones. - * `closed`: Match closed, non-hidden indices. Also matches any non-hidden - data stream. Data streams cannot be closed. * `hidden`: Match hidden data - streams and hidden indices. Must be combined with `open`, `closed`, or both. - * `none`: Wildcard patterns are not accepted. * `open`: Match open, non-hidden - indices. Also matches any non-hidden data stream. + expressions match hidden data streams. Supports comma-separated values. :param frequency: The interval at which scheduled queries are made while the datafeed runs in real time. The default value is either the bucket span for short bucket spans, or, for longer bucket spans, a sensible fraction of the diff --git a/elasticsearch/_sync/client/rollup.py b/elasticsearch/_sync/client/rollup.py index 5e34d954f..8f098e2ff 100644 --- a/elasticsearch/_sync/client/rollup.py +++ b/elasticsearch/_sync/client/rollup.py @@ -419,28 +419,7 @@ def rollup_search( The following functionality is not available:

    size: Because rollups work on pre-aggregated data, no search hits can be returned and so size must be set to zero or omitted entirely. highlighter, suggestors, post_filter, profile, explain: These are similarly disallowed.

    -

    Searching both historical rollup and non-rollup data

    -

    The rollup search API has the capability to search across both "live" non-rollup data and the aggregated rollup data. - This is done by simply adding the live indices to the URI. For example:

    -
    GET sensor-1,sensor_rollup/_rollup_search
    -          {
    -            "size": 0,
    -            "aggregations": {
    -               "max_temperature": {
    -                "max": {
    -                  "field": "temperature"
    -                }
    -              }
    -            }
    -          }
    -          
    -

    The rollup search endpoint does two things when the search runs:

    -
      -
    • The original request is sent to the non-rollup index unaltered.
    • -
    • A rewritten version of the original request is sent to the rollup index.
    • -
    -

    When the two responses are received, the endpoint rewrites the rollup response and merges the two together. - During the merging process, if there is any overlap in buckets between the two responses, the buckets from the non-rollup index are used.

    +

    For more detailed examples of using the rollup search API, including querying rolled-up data only or combining rolled-up and live data, refer to the External documentation.

    ``_ diff --git a/elasticsearch/_sync/client/security.py b/elasticsearch/_sync/client/security.py index 5aac0202f..e935053e3 100644 --- a/elasticsearch/_sync/client/security.py +++ b/elasticsearch/_sync/client/security.py @@ -2455,6 +2455,7 @@ def has_privileges( "manage_data_frame_transforms", "manage_data_stream_global_retention", "manage_enrich", + "manage_esql", "manage_ilm", "manage_index_templates", "manage_inference", @@ -2480,6 +2481,7 @@ def has_privileges( "monitor_data_frame_transforms", "monitor_data_stream_global_retention", "monitor_enrich", + "monitor_esql", "monitor_inference", "monitor_ml", "monitor_rollup", @@ -3126,6 +3128,7 @@ def put_role( "manage_data_frame_transforms", "manage_data_stream_global_retention", "manage_enrich", + "manage_esql", "manage_ilm", "manage_index_templates", "manage_inference", @@ -3151,6 +3154,7 @@ def put_role( "monitor_data_frame_transforms", "monitor_data_stream_global_retention", "monitor_enrich", + "monitor_esql", "monitor_inference", "monitor_ml", "monitor_rollup", @@ -3553,7 +3557,8 @@ def query_api_keys( You can optionally filter the results with a query.

    To use this API, you must have at least the manage_own_api_key or the read_security cluster privileges. If you have only the manage_own_api_key privilege, this API returns only the API keys that you own. - If you have the read_security, manage_api_key, or greater privileges (including manage_security), this API returns all API keys regardless of ownership.

    + If you have the read_security, manage_api_key, or greater privileges (including manage_security), this API returns all API keys regardless of ownership. + Refer to the linked documentation for examples of how to find API keys:

    ``_ @@ -4466,6 +4471,7 @@ def update_cross_cluster_api_key(

    This API supports updates to an API key's access scope, metadata, and expiration. The owner user's information, such as the username and realm, is also updated automatically on every call.

    NOTE: This API cannot update REST API keys, which should be updated by either the update API key or bulk update API keys API.

    +

    To learn more about how to use this API, refer to the Update cross cluter API key API examples page.

    ``_ diff --git a/elasticsearch/_sync/client/snapshot.py b/elasticsearch/_sync/client/snapshot.py index ae80bb2a7..55d873aa7 100644 --- a/elasticsearch/_sync/client/snapshot.py +++ b/elasticsearch/_sync/client/snapshot.py @@ -544,6 +544,28 @@ def get( ], ] ] = None, + state: t.Optional[ + t.Union[ + t.Sequence[ + t.Union[ + str, + t.Literal[ + "FAILED", + "INCOMPATIBLE", + "IN_PROGRESS", + "PARTIAL", + "SUCCESS", + ], + ] + ], + t.Union[ + str, + t.Literal[ + "FAILED", "INCOMPATIBLE", "IN_PROGRESS", "PARTIAL", "SUCCESS" + ], + ], + ] + ] = None, verbose: t.Optional[bool] = None, ) -> ObjectApiResponse[t.Any]: """ @@ -602,6 +624,8 @@ def get( all snapshots without an SLM policy. :param sort: The sort order for the result. The default behavior is sorting by snapshot start time stamp. + :param state: Only return snapshots with a state found in the given comma-separated + list of snapshot states. The default is all snapshot states. :param verbose: If `true`, returns additional information about each snapshot such as the version of Elasticsearch which took the snapshot, the start and end times of the snapshot, and the number of shards snapshotted. NOTE: The @@ -651,6 +675,8 @@ def get( __query["slm_policy_filter"] = slm_policy_filter if sort is not None: __query["sort"] = sort + if state is not None: + __query["state"] = state if verbose is not None: __query["verbose"] = verbose __headers = {"accept": "application/json"} @@ -749,20 +775,28 @@ def repository_analyze( """ .. raw:: html -

    Analyze a snapshot repository. - Analyze the performance characteristics and any incorrect behaviour found in a repository.

    -

    The response exposes implementation details of the analysis which may change from version to version. - The response body format is therefore not considered stable and may be different in newer versions.

    +

    Analyze a snapshot repository.

    +

    Performs operations on a snapshot repository in order to check for incorrect behaviour.

    There are a large number of third-party storage systems available, not all of which are suitable for use as a snapshot repository by Elasticsearch. - Some storage systems behave incorrectly, or perform poorly, especially when accessed concurrently by multiple clients as the nodes of an Elasticsearch cluster do. This API performs a collection of read and write operations on your repository which are designed to detect incorrect behaviour and to measure the performance characteristics of your storage system.

    + Some storage systems behave incorrectly, or perform poorly, especially when accessed concurrently by multiple clients as the nodes of an Elasticsearch cluster do. + This API performs a collection of read and write operations on your repository which are designed to detect incorrect behaviour and to measure the performance characteristics of your storage system.

    The default values for the parameters are deliberately low to reduce the impact of running an analysis inadvertently and to provide a sensible starting point for your investigations. Run your first analysis with the default parameter values to check for simple problems. - If successful, run a sequence of increasingly large analyses until you encounter a failure or you reach a blob_count of at least 2000, a max_blob_size of at least 2gb, a max_total_data_size of at least 1tb, and a register_operation_count of at least 100. + Some repositories may behave correctly when lightly loaded but incorrectly under production-like workloads. + If the first analysis is successful, run a sequence of increasingly large analyses until you encounter a failure or you reach a blob_count of at least 2000, a max_blob_size of at least 2gb, a max_total_data_size of at least 1tb, and a register_operation_count of at least 100. Always specify a generous timeout, possibly 1h or longer, to allow time for each analysis to run to completion. + Some repositories may behave correctly when accessed by a small number of Elasticsearch nodes but incorrectly when accessed concurrently by a production-scale cluster. Perform the analyses using a multi-node cluster of a similar size to your production cluster so that it can detect any problems that only arise when the repository is accessed by many nodes at once.

    If the analysis fails, Elasticsearch detected that your repository behaved unexpectedly. This usually means you are using a third-party storage system with an incorrect or incompatible implementation of the API it claims to support. If so, this storage system is not suitable for use as a snapshot repository. + Repository analysis triggers conditions that occur only rarely when taking snapshots in a production system. + Snapshotting to unsuitable storage may appear to work correctly most of the time despite repository analysis failures. + However your snapshot data is at risk if you store it in a snapshot repository that does not reliably pass repository analysis. + You can demonstrate that the analysis failure is due to an incompatible storage implementation by verifying that Elasticsearch does not detect the same problem when analysing the reference implementation of the storage protocol you are using. + For instance, if you are using storage that offers an API which the supplier claims to be compatible with AWS S3, verify that repositories in AWS S3 do not fail repository analysis. + This allows you to demonstrate to your storage supplier that a repository analysis failure must only be caused by an incompatibility with AWS S3 and cannot be attributed to a problem in Elasticsearch. + Please do not report Elasticsearch issues involving third-party storage systems unless you can demonstrate that the same issue exists when analysing a repository that uses the reference implementation of the same storage protocol. You will need to work with the supplier of your storage system to address the incompatibilities that Elasticsearch detects.

    If the analysis is successful, the API returns details of the testing process, optionally including how long each operation took. You can use this information to determine the performance of your storage system. @@ -790,14 +824,17 @@ def repository_analyze( This consumes bandwidth on the network between the cluster and the repository, and storage space and I/O bandwidth on the repository itself. You must ensure this load does not affect other users of these systems. Analyses respect the repository settings max_snapshot_bytes_per_sec and max_restore_bytes_per_sec if available and the cluster setting indices.recovery.max_bytes_per_sec which you can use to limit the bandwidth they consume.

    -

    NOTE: This API is intended for exploratory use by humans. You should expect the request parameters and the response format to vary in future versions.

    +

    NOTE: This API is intended for exploratory use by humans. + You should expect the request parameters and the response format to vary in future versions. + The response exposes immplementation details of the analysis which may change from version to version.

    NOTE: Different versions of Elasticsearch may perform different checks for repository compatibility, with newer versions typically being stricter than older ones. A storage system that passes repository analysis with one version of Elasticsearch may fail with a different version. This indicates it behaves incorrectly in ways that the former version did not detect. You must work with the supplier of your storage system to address the incompatibilities detected by the repository analysis API in any version of Elasticsearch.

    NOTE: This API may not work correctly in a mixed-version cluster.

    Implementation details

    -

    NOTE: This section of documentation describes how the repository analysis API works in this version of Elasticsearch, but you should expect the implementation to vary between versions. The request parameters and response format depend on details of the implementation so may also be different in newer versions.

    +

    NOTE: This section of documentation describes how the repository analysis API works in this version of Elasticsearch, but you should expect the implementation to vary between versions. + The request parameters and response format depend on details of the implementation so may also be different in newer versions.

    The analysis comprises a number of blob-level tasks, as set by the blob_count parameter and a number of compare-and-exchange operations on linearizable registers, as set by the register_operation_count parameter. These tasks are distributed over the data and master-eligible nodes in the cluster for execution.

    For most blob-level tasks, the executing node first writes a blob to the repository and then instructs some of the other nodes in the cluster to attempt to read the data it just wrote. @@ -1223,6 +1260,11 @@ def status(

    If you omit the <snapshot> request path parameter, the request retrieves information only for currently running snapshots. This usage is preferred. If needed, you can specify <repository> and <snapshot> to retrieve information for specific snapshots, even if they're not currently running.

    +

    Note that the stats will not be available for any shard snapshots in an ongoing snapshot completed by a node that (even momentarily) left the cluster. + Loading the stats from the repository is an expensive operation (see the WARNING below). + Therefore the stats values for such shards will be -1 even though the "stage" value will be "DONE", in order to minimize latency. + A "description" field will be present for a shard snapshot completed by a departed node explaining why the shard snapshot's stats results are invalid. + Consequently, the total stats for the index will be less than expected due to the missing values from these shards.

    WARNING: Using the API to return the status of any snapshots other than currently running snapshots can be expensive. The API requires a read from the repository for each shard in each snapshot. For example, if you have 100 snapshots with 1,000 shards each, an API request that includes all snapshots will require 100,000 reads (100 snapshots x 1,000 shards).

    diff --git a/elasticsearch/_sync/client/synonyms.py b/elasticsearch/_sync/client/synonyms.py index 1c9613196..8731f40fd 100644 --- a/elasticsearch/_sync/client/synonyms.py +++ b/elasticsearch/_sync/client/synonyms.py @@ -90,6 +90,7 @@ def delete_synonym_rule( filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, pretty: t.Optional[bool] = None, + refresh: t.Optional[bool] = None, ) -> ObjectApiResponse[t.Any]: """ .. raw:: html @@ -102,6 +103,9 @@ def delete_synonym_rule( :param set_id: The ID of the synonym set to update. :param rule_id: The ID of the synonym rule to delete. + :param refresh: If `true`, the request will refresh the analyzers with the deleted + synonym rule and wait for the new synonyms to be available before returning. + If `false`, analyzers will not be reloaded with the deleted synonym rule """ if set_id in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'set_id'") @@ -121,6 +125,8 @@ def delete_synonym_rule( __query["human"] = human if pretty is not None: __query["pretty"] = pretty + if refresh is not None: + __query["refresh"] = refresh __headers = {"accept": "application/json"} return self.perform_request( # type: ignore[return-value] "DELETE", @@ -299,6 +305,7 @@ def put_synonym( filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, pretty: t.Optional[bool] = None, + refresh: t.Optional[bool] = None, body: t.Optional[t.Dict[str, t.Any]] = None, ) -> ObjectApiResponse[t.Any]: """ @@ -309,12 +316,16 @@ def put_synonym( If you need to manage more synonym rules, you can create multiple synonym sets.

    When an existing synonyms set is updated, the search analyzers that use the synonyms set are reloaded automatically for all indices. This is equivalent to invoking the reload search analyzers API for all indices that use the synonyms set.

    +

    For practical examples of how to create or update a synonyms set, refer to the External documentation.

    ``_ :param id: The ID of the synonyms set to be created or updated. :param synonyms_set: The synonym rules definitions for the synonyms set. + :param refresh: If `true`, the request will refresh the analyzers with the new + synonyms set and wait for the new synonyms to be available before returning. + If `false`, analyzers will not be reloaded with the new synonym set """ if id in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'id'") @@ -332,6 +343,8 @@ def put_synonym( __query["human"] = human if pretty is not None: __query["pretty"] = pretty + if refresh is not None: + __query["refresh"] = refresh if not __body: if synonyms_set is not None: __body["synonyms_set"] = synonyms_set @@ -359,6 +372,7 @@ def put_synonym_rule( filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, pretty: t.Optional[bool] = None, + refresh: t.Optional[bool] = None, body: t.Optional[t.Dict[str, t.Any]] = None, ) -> ObjectApiResponse[t.Any]: """ @@ -376,6 +390,9 @@ def put_synonym_rule( :param rule_id: The ID of the synonym rule to be updated or created. :param synonyms: The synonym rule information definition, which must be in Solr format. + :param refresh: If `true`, the request will refresh the analyzers with the new + synonym rule and wait for the new synonyms to be available before returning. + If `false`, analyzers will not be reloaded with the new synonym rule """ if set_id in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'set_id'") @@ -398,6 +415,8 @@ def put_synonym_rule( __query["human"] = human if pretty is not None: __query["pretty"] = pretty + if refresh is not None: + __query["refresh"] = refresh if not __body: if synonyms is not None: __body["synonyms"] = synonyms diff --git a/elasticsearch/_sync/client/tasks.py b/elasticsearch/_sync/client/tasks.py index e341b371c..d9fc0b385 100644 --- a/elasticsearch/_sync/client/tasks.py +++ b/elasticsearch/_sync/client/tasks.py @@ -36,7 +36,7 @@ class TasksClient(NamespacedClient): def cancel( self, *, - task_id: t.Optional[t.Union[int, str]] = None, + task_id: t.Optional[str] = None, actions: t.Optional[t.Union[str, t.Sequence[str]]] = None, error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, diff --git a/elasticsearch/_sync/client/watcher.py b/elasticsearch/_sync/client/watcher.py index 92c70da27..9839cb80b 100644 --- a/elasticsearch/_sync/client/watcher.py +++ b/elasticsearch/_sync/client/watcher.py @@ -45,7 +45,8 @@ def ack_watch(

    IMPORTANT: If the specified watch is currently being executed, this API will return an error The reason for this behavior is to prevent overwriting the watch status from a watch execution.

    Acknowledging an action throttles further executions of that action until its ack.state is reset to awaits_successful_execution. - This happens when the condition of the watch is not met (the condition evaluates to false).

    + This happens when the condition of the watch is not met (the condition evaluates to false). + To demonstrate how throttling works in practice and how it can be configured for individual actions within a watch, refer to External documentation.

    ``_ @@ -274,7 +275,8 @@ def execute_watch( This serves as great tool for testing and debugging your watches prior to adding them to Watcher.

    When Elasticsearch security features are enabled on your cluster, watches are run with the privileges of the user that stored the watches. If your user is allowed to read index a, but not index b, then the exact same set of rules will apply during execution of a watch.

    -

    When using the run watch API, the authorization data of the user that called the API will be used as a base, instead of the information who stored the watch.

    +

    When using the run watch API, the authorization data of the user that called the API will be used as a base, instead of the information who stored the watch. + Refer to the external documentation for examples of watch execution requests, including existing, customized, and inline watches.

    ``_ diff --git a/elasticsearch/dsl/field.py b/elasticsearch/dsl/field.py index 73108bf3f..c33261458 100644 --- a/elasticsearch/dsl/field.py +++ b/elasticsearch/dsl/field.py @@ -4081,6 +4081,9 @@ def __init__( class SparseVector(Field): """ :arg store: + :arg index_options: Additional index options for the sparse vector + field that controls the token pruning behavior of the sparse + vector field. :arg meta: Metadata about the field. :arg properties: :arg ignore_above: @@ -4099,6 +4102,9 @@ def __init__( self, *args: Any, store: Union[bool, "DefaultType"] = DEFAULT, + index_options: Union[ + "types.SparseVectorIndexOptions", Dict[str, Any], "DefaultType" + ] = DEFAULT, meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT, properties: Union[Mapping[str, Field], "DefaultType"] = DEFAULT, ignore_above: Union[int, "DefaultType"] = DEFAULT, @@ -4113,6 +4119,8 @@ def __init__( ): if store is not DEFAULT: kwargs["store"] = store + if index_options is not DEFAULT: + kwargs["index_options"] = index_options if meta is not DEFAULT: kwargs["meta"] = meta if properties is not DEFAULT: diff --git a/elasticsearch/dsl/types.py b/elasticsearch/dsl/types.py index 7aaf52da6..2da646e55 100644 --- a/elasticsearch/dsl/types.py +++ b/elasticsearch/dsl/types.py @@ -3723,6 +3723,38 @@ def __init__( super().__init__(kwargs) +class SparseVectorIndexOptions(AttrDict[Any]): + """ + :arg prune: Whether to perform pruning, omitting the non-significant + tokens from the query to improve query performance. If prune is + true but the pruning_config is not specified, pruning will occur + but default values will be used. Default: false + :arg pruning_config: Optional pruning configuration. If enabled, this + will omit non-significant tokens from the query in order to + improve query performance. This is only used if prune is set to + true. If prune is set to true but pruning_config is not specified, + default values will be used. + """ + + prune: Union[bool, DefaultType] + pruning_config: Union["TokenPruningConfig", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + prune: Union[bool, DefaultType] = DEFAULT, + pruning_config: Union[ + "TokenPruningConfig", Dict[str, Any], DefaultType + ] = DEFAULT, + **kwargs: Any, + ): + if prune is not DEFAULT: + kwargs["prune"] = prune + if pruning_config is not DEFAULT: + kwargs["pruning_config"] = pruning_config + super().__init__(kwargs) + + class SuggestContext(AttrDict[Any]): """ :arg name: (required)