diff --git a/etc/gen_atlas_search/.gitignore b/etc/gen_atlas_search/.gitignore new file mode 100644 index 000000000..eb5a316cb --- /dev/null +++ b/etc/gen_atlas_search/.gitignore @@ -0,0 +1 @@ +target diff --git a/etc/gen_atlas_search/Cargo.lock b/etc/gen_atlas_search/Cargo.lock new file mode 100644 index 000000000..f3356e5cd --- /dev/null +++ b/etc/gen_atlas_search/Cargo.lock @@ -0,0 +1,149 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "convert_case" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baaaa0ecca5b51987b9423ccdc971514dd8b0bb7b4060b983d3664dad3f1f89f" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "gen_atlas_search" +version = "0.1.0" +dependencies = [ + "convert_case", + "prettyplease", + "proc-macro2", + "quote", + "serde", + "serde_yaml", + "syn", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" + +[[package]] +name = "indexmap" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "prettyplease" +version = "0.2.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff24dfcda44452b9816fff4cd4227e1bb73ff5a2f1bc1105aa92fb8565ce44d2" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.97" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d61789d7719defeb74ea5fe81f2fdfdbd28a803847077cecce2ff14e1472f6f1" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "syn" +version = "2.0.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bc3fcb250e53458e712715cf74285c1f889686520d79294a9ef3bd7aa1fc619" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" diff --git a/etc/gen_atlas_search/Cargo.toml b/etc/gen_atlas_search/Cargo.toml new file mode 100644 index 000000000..0f504c7db --- /dev/null +++ b/etc/gen_atlas_search/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "gen_atlas_search" +version = "0.1.0" +edition = "2024" + +[dependencies] +convert_case = "0.8.0" +prettyplease = "0.2.36" +proc-macro2 = "1.0.97" +quote = "1.0.40" +serde = { version = "1.0.219", features = ["derive"] } +serde_yaml = "0.9.34" +syn = { version = "2.0.105", features = ["full", "printing"] } diff --git a/etc/gen_atlas_search/regenerate.sh b/etc/gen_atlas_search/regenerate.sh new file mode 100755 index 000000000..c1d4ae43d --- /dev/null +++ b/etc/gen_atlas_search/regenerate.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -o errexit +set -x + +gen_path="$(dirname $0)/../../src/atlas_search/gen.rs" + +cargo run > ${gen_path} +rustfmt +nightly --unstable-features ${gen_path} \ No newline at end of file diff --git a/etc/gen_atlas_search/src/main.rs b/etc/gen_atlas_search/src/main.rs new file mode 100644 index 000000000..4a1346918 --- /dev/null +++ b/etc/gen_atlas_search/src/main.rs @@ -0,0 +1,223 @@ +use convert_case::{Case, Casing}; +use proc_macro2::TokenStream; +use quote::format_ident; +use serde::Deserialize; +use syn::parse_quote; + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase", deny_unknown_fields)] +struct Operator { + name: String, + link: String, + #[serde(rename = "type")] + #[expect(dead_code)] + type_: Vec, + #[expect(dead_code)] + encode: EncodeType, + description: String, + arguments: Vec, + tests: Vec, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +enum OperatorType { + SearchOperator, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +enum EncodeType { + Object, +} + +impl Operator { + fn clear_tests(mut self) -> Self { + self.tests.clear(); + self + } + + fn gen_helper(&self) -> TokenStream { + let name_text = &self.name; + let name_ident = format_ident!("{}", name_text.to_case(Case::Pascal)); + let constr_ident = format_ident!("{}", name_text.to_case(Case::Snake)); + + let mut required_args = TokenStream::new(); + let mut init_doc = TokenStream::new(); + let mut setters = TokenStream::new(); + + for arg in &self.arguments { + let ident = format_ident!("{}", arg.name.to_case(Case::Snake)); + let rust_type = arg.rust_type(); + let type_ = rust_type.tokens(); + let arg_name = &arg.name; + let init_expr = rust_type.bson_expr(&ident); + + if arg.optional.unwrap_or(false) { + let tvars = rust_type.variables(); + setters.push(parse_quote! { + #[allow(missing_docs)] + pub fn #ident<#tvars>(mut self, #ident: #type_) -> Self { + self.stage.insert(#arg_name, #init_expr); + self + } + }); + } else { + required_args.push(parse_quote! { #ident : #type_, }); + init_doc.push(parse_quote! { #arg_name : #init_expr, }); + } + } + + let desc = &self.description; + let link = format!( + "For more details, see the [{name_text} operator reference]({}).", + self.link + ); + parse_quote! { + #[allow(missing_docs)] + pub struct #name_ident; + + impl AtlasSearch<#name_ident> { + #[doc = #desc] + #[doc = ""] + #[doc = #link] + pub fn #constr_ident(#required_args) -> Self { + AtlasSearch { + name: #name_text, + stage: doc! { #init_doc }, + _t: PhantomData, + } + } + #setters + } + } + } +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase", deny_unknown_fields)] +struct Argument { + name: String, + #[serde(default)] + optional: Option, + #[serde(rename = "type")] + type_: Vec, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +enum ArgumentType { + String, + Object, + SearchScore, + SearchPath, + SearchOperator, + Array, + Int, +} + +static QUERY: &str = "query"; +static TOKEN_ORDER: &str = "tokenOrder"; +static MATCH_CRITERIA: &str = "matchCriteria"; + +impl Argument { + fn rust_type(&self) -> ArgumentRustType { + if self.name == QUERY { + return ArgumentRustType::StringOrArray; + } + if self.name == TOKEN_ORDER { + return ArgumentRustType::TokenOrder; + } + if self.name == MATCH_CRITERIA { + return ArgumentRustType::MatchCriteria; + } + match self.type_.as_slice() { + [ArgumentType::String] => ArgumentRustType::String, + [ArgumentType::Object] => ArgumentRustType::Document, + [ArgumentType::SearchScore] => ArgumentRustType::Document, + [ArgumentType::SearchPath] => ArgumentRustType::StringOrArray, + [ArgumentType::SearchOperator, ArgumentType::Array] => ArgumentRustType::Operator, + [ArgumentType::Int] => ArgumentRustType::I32, + _ => panic!("Unexpected argument types: {:?}", self.type_), + } + } +} + +enum ArgumentRustType { + String, + Document, + StringOrArray, + TokenOrder, + MatchCriteria, + Operator, + I32, +} + +impl ArgumentRustType { + fn tokens(&self) -> syn::Type { + match self { + Self::String => parse_quote! { impl AsRef }, + Self::Document => parse_quote! { Document }, + Self::StringOrArray => parse_quote! { impl StringOrArray }, + Self::TokenOrder => parse_quote! { TokenOrder }, + Self::MatchCriteria => parse_quote! { MatchCriteria }, + Self::Operator => parse_quote! { impl IntoIterator> }, + Self::I32 => parse_quote! { i32 }, + } + } + + fn variables(&self) -> TokenStream { + match self { + Self::Operator => parse_quote! { T }, + _ => parse_quote! {}, + } + } + + fn bson_expr(&self, ident: &syn::Ident) -> syn::Expr { + match self { + Self::String => parse_quote! { #ident.as_ref() }, + Self::StringOrArray => parse_quote! { #ident.to_bson() }, + Self::TokenOrder | Self::MatchCriteria => parse_quote! { #ident.name() }, + Self::Document | Self::I32 => parse_quote! { #ident }, + Self::Operator => { + parse_quote! { #ident.into_iter().map(Document::from).collect::>() } + } + } + } +} + +// Type inference helper: TokenStream impls Extend for both TokenTree and TokenStream, so calling +// `stream.extend(parse_quote! { blah })` is ambiguous, where `stream.push(...)` is not. +trait TokenStreamExt { + fn push(&mut self, other: TokenStream); +} + +impl TokenStreamExt for TokenStream { + fn push(&mut self, other: TokenStream) { + self.extend(other); + } +} + +fn main() { + let mut operators = TokenStream::new(); + for path in [ + "yaml/search/autocomplete.yaml", + "yaml/search/text.yaml", + "yaml/search/compound.yaml", + ] { + let contents = std::fs::read_to_string(path).unwrap(); + let parsed = serde_yaml::from_str::(&contents) + .unwrap() + .clear_tests(); + operators.push(parsed.gen_helper()); + } + + let file = parse_quote! { + //! This file was autogenerated. Do not manually edit. + use super::*; + + #operators + }; + let text = prettyplease::unparse(&file); + println!("{text}"); +} diff --git a/etc/gen_atlas_search/update-yaml.sh b/etc/gen_atlas_search/update-yaml.sh new file mode 100755 index 000000000..dbec840cb --- /dev/null +++ b/etc/gen_atlas_search/update-yaml.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +set -o errexit +set -x + +basedir=$(dirname $(realpath $0)) +tmpdir=$(mktemp -d) + +git clone --sparse --depth 1 "https://github.com/mongodb/mongo-php-library.git" "${tmpdir}" +cd "${tmpdir}" +git sparse-checkout add generator/config/search + +cd "${basedir}" +mkdir -p "yaml/search" +rsync -ah "${tmpdir}/generator/config/search/" "yaml/search" --delete + +rm -rf "${tmpdir}" \ No newline at end of file diff --git a/etc/gen_atlas_search/yaml/search/autocomplete.yaml b/etc/gen_atlas_search/yaml/search/autocomplete.yaml new file mode 100644 index 000000000..a984b9a39 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/autocomplete.yaml @@ -0,0 +1,152 @@ +# $schema: ../schema.json +name: autocomplete +link: 'https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/' +type: + - searchOperator +encode: object +description: | + The autocomplete operator performs a search for a word or phrase that + contains a sequence of characters from an incomplete input string. The + fields that you intend to query with the autocomplete operator must be + indexed with the autocomplete data type in the collection's index definition. +arguments: + - + name: path + type: + - searchPath + - + name: query + type: + - string + - + name: tokenOrder + optional: true + type: + - string # any|sequential + - + name: fuzzy + optional: true + type: + - object + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Basic' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/#basic-example' + pipeline: + - + $search: + autocomplete: + query: 'off' + path: 'title' + - + $limit: 10 + - + $project: + _id: 0 + title: 1 + + - + name: 'Fuzzy' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/#fuzzy-example' + pipeline: + - + $search: + autocomplete: + query: 'pre' + path: 'title' + fuzzy: + maxEdits: 1 + prefixLength: 1 + maxExpansions: 256 + - + $limit: 10 + - + $project: + _id: 0 + title: 1 + + - + name: 'Token Order any' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/#simple-any-example' + pipeline: + - + $search: + autocomplete: + query: 'men with' + path: 'title' + tokenOrder: 'any' + - + $limit: 4 + - + $project: + _id: 0 + title: 1 + + - + name: 'Token Order sequential' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/#simple-sequential-example' + pipeline: + - + $search: + autocomplete: + query: 'men with' + path: 'title' + tokenOrder: 'sequential' + - + $limit: 4 + - + $project: + _id: 0 + title: 1 + + - + name: 'Highlighting' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/#highlighting-example' + pipeline: + - + $search: + autocomplete: + query: 'ger' + path: 'title' + highlight: + path: 'title' + - + $limit: 5 + - + $project: + score: + $meta: 'searchScore' + _id: 0 + title: 1 + highlights: + $meta: 'searchHighlights' + + - + name: 'Across Multiple Fields' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/#search-across-multiple-fields' + pipeline: + - + $search: + compound: + should: + - + autocomplete: + query: 'inter' + path: 'title' + - + autocomplete: + query: 'inter' + path: 'plot' + minimumShouldMatch: 1 + - + $limit: 10 + - + $project: + _id: 0 + title: 1 + plot: 1 diff --git a/etc/gen_atlas_search/yaml/search/compound.yaml b/etc/gen_atlas_search/yaml/search/compound.yaml new file mode 100644 index 000000000..7a1d9f419 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/compound.yaml @@ -0,0 +1,156 @@ +# $schema: ../schema.json +name: compound +link: 'https://www.mongodb.com/docs/atlas/atlas-search/compound/' +type: + - searchOperator +encode: object +description: | + The compound operator combines two or more operators into a single query. + Each element of a compound query is called a clause, and each clause + consists of one or more sub-queries. +arguments: + - + name: must + optional: true + type: + - searchOperator + - array # of searchOperator + - + name: mustNot + optional: true + type: + - searchOperator + - array # of searchOperator + - + name: should + optional: true + type: + - searchOperator + - array # of searchOperator + - + name: filter + optional: true + type: + - searchOperator + - array # of searchOperator + - + name: minimumShouldMatch + optional: true + type: + - int + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'must and mustNot' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/compound/#must-and-mustnot-example' + pipeline: + - + $search: + compound: + must: + - + text: + query: 'varieties' + path: 'description' + mustNot: + - + text: + query: 'apples' + path: 'description' + + - + name: 'must and should' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/compound/#must-and-should-example' + pipeline: + - + $search: + compound: + must: + - + text: + query: 'varieties' + path: 'description' + should: + - + text: + query: 'Fuji' + path: 'description' + - + $project: + score: + $meta: 'searchScore' + + - + name: 'minimumShouldMatch' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/compound/#minimumshouldmatch-example' + pipeline: + - + $search: + compound: + must: + - + text: + query: 'varieties' + path: 'description' + should: + - + text: + query: 'Fuji' + path: 'description' + - + text: + query: 'Golden Delicious' + path: 'description' + minimumShouldMatch: 1 + + - + name: 'Filter' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/compound/#filter-examples' + pipeline: + - + $search: + compound: + must: + - + text: + query: 'varieties' + path: 'description' + should: + - + text: + query: 'banana' + path: 'description' + filter: + - + text: + query: 'granny' + path: 'description' + + - + name: 'Nested' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/compound/#nested-example' + pipeline: + - + $search: + compound: + should: + - + text: + query: 'apple' + path: 'type' + - + compound: + must: + - + text: + query: 'organic' + path: 'category' + - + equals: + value: true + path: 'in_stock' + minimumShouldMatch: 1 diff --git a/etc/gen_atlas_search/yaml/search/embeddedDocument.yaml b/etc/gen_atlas_search/yaml/search/embeddedDocument.yaml new file mode 100644 index 000000000..19c804625 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/embeddedDocument.yaml @@ -0,0 +1,155 @@ +# $schema: ../schema.json +name: embeddedDocument +link: 'https://www.mongodb.com/docs/atlas/atlas-search/embedded-document/' +type: + - searchOperator +encode: object +description: | + The embeddedDocument operator is similar to $elemMatch operator. + It constrains multiple query predicates to be satisfied from a single + element of an array of embedded documents. embeddedDocument can be used only + for queries over fields of the embeddedDocuments +arguments: + - + name: path + type: + - searchPath + - + name: operator + type: + - searchOperator + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Basic' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/embedded-document/#index-definition' + pipeline: + - + $search: + embeddedDocument: + path: 'items' + operator: + compound: + must: + - + text: + path: 'items.tags' + query: 'school' + should: + - + text: + path: 'items.name' + query: 'backpack' + score: + embedded: + aggregate: 'mean' + - + $limit: 5 + - + $project: + _id: 0 + items.name: 1 + items.tags: 1 + score: + $meta: 'searchScore' + + - + name: 'Facet' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/embedded-document/#facet-query' + pipeline: + - + $searchMeta: + facet: + operator: + embeddedDocument: + path: 'items' + operator: + compound: + must: + - + text: + path: 'items.tags' + query: 'school' + should: + - + text: + path: 'items.name' + query: 'backpack' + facets: + purchaseMethodFacet: + type: 'string' + path: 'purchaseMethod' + + - + name: 'Query and Sort' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/embedded-document/#query-and-sort' + pipeline: + - + $search: + embeddedDocument: + path: 'items' + operator: + text: + path: 'items.name' + query: 'laptop' + sort: + items.tags: 1 + - + $limit: 5 + - + $project: + _id: 0 + items.name: 1 + items.tags: 1 + score: + $meta: 'searchScore' + + - + name: 'Query for Matching Embedded Documents Only' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/embedded-document/#query-for-matching-embedded-documents-only' + pipeline: + - + $search: + embeddedDocument: + path: 'items' + operator: + compound: + must: + - + range: + path: 'items.quantity' + gt: 2 + - + exists: + path: 'items.price' + - + text: + path: 'items.tags' + query: 'school' + - + $limit: 2 + - + $project: + _id: 0 + storeLocation: 1 + items: + $filter: + input: '$items' + cond: + $and: + - + $ifNull: + - '$$this.price' + - 'false' + - + $gt: + - '$$this.quantity' + - 2 + - + $in: + - 'office' + - '$$this.tags' diff --git a/etc/gen_atlas_search/yaml/search/equals.yaml b/etc/gen_atlas_search/yaml/search/equals.yaml new file mode 100644 index 000000000..b3e50c641 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/equals.yaml @@ -0,0 +1,104 @@ +# $schema: ../schema.json +name: equals +link: 'https://www.mongodb.com/docs/atlas/atlas-search/equals/' +type: + - searchOperator +encode: object +description: | + The equals operator checks whether a field matches a value you specify. +arguments: + - + name: path + type: + - searchPath + - + name: value + type: + - binData + - bool + - date + - objectId + - 'null' + - number + - string + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Boolean' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/equals/#boolean-examples' + pipeline: + - + $search: + equals: + path: 'verified_user' + value: true + - + $project: + name: 1 + _id: 0 + score: + $meta: 'searchScore' + + - + name: 'ObjectId' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/equals/#objectid-example' + pipeline: + - + $search: + equals: + path: 'teammates' + value: !bson_objectId '5a9427648b0beebeb69589a1' + + - + name: 'Date' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/equals/#date-example' + pipeline: + - + $search: + equals: + path: 'account_created' + value: !bson_utcdatetime '2022-05-04T05:01:08.000+00:00' + + - + name: 'Number' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/equals/#number-example' + pipeline: + - + $search: + equals: + path: 'employee_number' + value: 259 + + - + name: 'String' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/equals/#string-example' + pipeline: + - + $search: + equals: + path: 'name' + value: 'jim hall' + + - + name: 'UUID' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/equals/#uuid-example' + pipeline: + - + $search: + equals: + path: 'uuid' + value: !bson_uuid 'fac32260-b511-4c69-8485-a2be5b7dda9e' + + - + name: 'Null' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/equals/#null-example' + pipeline: + - + $search: + equals: + path: 'job_title' + value: ~ diff --git a/etc/gen_atlas_search/yaml/search/exists.yaml b/etc/gen_atlas_search/yaml/search/exists.yaml new file mode 100644 index 000000000..062e8ba59 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/exists.yaml @@ -0,0 +1,56 @@ +# $schema: ../schema.json +name: exists +link: 'https://www.mongodb.com/docs/atlas/atlas-search/exists/' +type: + - searchOperator +encode: object +description: | + The exists operator tests if a path to a specified indexed field name exists in a document. +arguments: + - + name: path + type: + - searchPath + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Basic' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/exists/#basic-example' + pipeline: + - + $search: + exists: + path: 'type' + + - + name: 'Embedded' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/exists/#embedded-example' + pipeline: + - + $search: + exists: + path: 'quantities.lemons' + + - + name: 'Compound' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/exists/#compound-example' + pipeline: + - + $search: + compound: + must: + - + exists: + path: 'type' + - + text: + query: 'apple' + path: 'type' + should: + text: + query: 'fuji' + path: 'description' diff --git a/etc/gen_atlas_search/yaml/search/facet.yaml b/etc/gen_atlas_search/yaml/search/facet.yaml new file mode 100644 index 000000000..53dc8cba9 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/facet.yaml @@ -0,0 +1,56 @@ +# $schema: ../schema.json +name: facet +link: 'https://www.mongodb.com/docs/atlas/atlas-search/facet/' +type: + - searchOperator # should be searchCollector +encode: object +description: | + The facet collector groups results by values or ranges in the specified + faceted fields and returns the count for each of those groups. +arguments: + - + name: facets + type: + - object # map of facetDefinition + - + name: operator + optional: true + type: + - searchOperator +tests: + - + name: 'Facet' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/facet/#examples' + pipeline: + - + $search: + facet: + operator: + near: + path: 'released' + origin: !bson_utcdatetime '1999-07-01T00:00:00.000+00:00' + pivot: 7776000000 + facets: + genresFacet: + type: 'string' + path: 'genres' + - + $limit: 2 + - + $facet: + docs: + - + $project: + title: 1 + released: 1 + meta: + - + $replaceWith: '$$SEARCH_META' + - + $limit: 1 + - + $set: + meta: + $arrayElemAt: + - '$meta' + - 0 diff --git a/etc/gen_atlas_search/yaml/search/geoShape.yaml b/etc/gen_atlas_search/yaml/search/geoShape.yaml new file mode 100644 index 000000000..4da121e45 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/geoShape.yaml @@ -0,0 +1,123 @@ +# $schema: ../schema.json +name: geoShape +link: 'https://www.mongodb.com/docs/atlas/atlas-search/geoShape/' +type: + - searchOperator +encode: object +description: | + The geoShape operator supports querying shapes with a relation to a given + geometry if indexShapes is set to true in the index definition. +arguments: + - + name: path + type: + - searchPath + - + name: relation + type: + - string # contains | disjoint | intersects | within + - + name: geometry + type: + - geometry + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Disjoint' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/geoShape/#disjoint-example' + pipeline: + - + $search: + geoShape: + relation: 'disjoint' + geometry: + type: 'Polygon' + coordinates: + - + - [-161.323242, 22.512557] + - [-152.446289, 22.065278] + - [-156.09375, 17.811456] + - [-161.323242, 22.512557] + path: 'address.location' + - + $limit: 3 + - + $project: + _id: 0 + name: 1 + address: 1 + score: + $meta: 'searchScore' + + - + name: 'Intersect' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/geoShape/#intersects-example' + pipeline: + - + $search: + geoShape: + relation: 'intersects' + geometry: + type: 'MultiPolygon' + coordinates: + - + - + - [2.16942, 41.40082] + - [2.17963, 41.40087] + - [2.18146, 41.39716] + - [2.15533, 41.40686] + - [2.14596, 41.38475] + - [2.17519, 41.41035] + - [2.16942, 41.40082] + - + - + - [2.16365, 41.39416] + - [2.16963, 41.39726] + - [2.15395, 41.38005] + - [2.17935, 41.43038] + - [2.16365, 41.39416] + path: 'address.location' + - + $limit: 3 + - + $project: + _id: 0 + name: 1 + address: 1 + score: + $meta: 'searchScore' + + - + name: 'Within' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/geoShape/#within-example' + pipeline: + - + $search: + geoShape: + relation: 'within' + geometry: + type: 'Polygon' + coordinates: + - + - [-74.3994140625, 40.5305017757] + - [-74.7290039063, 40.5805846641] + - [-74.7729492188, 40.9467136651] + - [-74.0698242188, 41.1290213475] + - [-73.65234375, 40.9964840144] + - [-72.6416015625, 40.9467136651] + - [-72.3559570313, 40.7971774152] + - [-74.3994140625, 40.5305017757] + path: 'address.location' + - + $limit: 3 + - + $project: + _id: 0 + name: 1 + address: 1 + score: + $meta: 'searchScore' diff --git a/etc/gen_atlas_search/yaml/search/geoWithin.yaml b/etc/gen_atlas_search/yaml/search/geoWithin.yaml new file mode 100644 index 000000000..1739f1997 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/geoWithin.yaml @@ -0,0 +1,103 @@ +# $schema: ../schema.json +name: geoWithin +link: 'https://www.mongodb.com/docs/atlas/atlas-search/geoWithin/' +type: + - searchOperator +encode: object +description: | + The geoWithin operator supports querying geographic points within a given + geometry. Only points are returned, even if indexShapes value is true in + the index definition. +arguments: + - + name: path + type: + - searchPath + - + name: box + optional: true + type: + - object + - + name: circle + optional: true + type: + - object + - + name: geometry + optional: true + type: + - geometry + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'box' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/geoWithin/#box-example' + pipeline: + - + $search: + geoWithin: + path: 'address.location' + box: + bottomLeft: + type: 'Point' + coordinates: [112.467, -55.05] + topRight: + type: 'Point' + coordinates: [168, -9.133] + - + $limit: 3 + - + $project: + _id: 0 + name: 1 + address: 1 + + - + name: 'circle' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/geoWithin/#circle-example' + pipeline: + - + $search: + geoWithin: + circle: + center: + type: 'Point' + coordinates: [-73.54, 45.54] + radius: 1600 + path: 'address.location' + - + $limit: 3 + - + $project: + _id: 0 + name: 1 + address: 1 + + - + name: 'geometry' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/geoWithin/#geometry-examples' + pipeline: + - + $search: + geoWithin: + geometry: + type: 'Polygon' + coordinates: + - + - [-161.323242, 22.512557] + - [-152.446289, 22.065278] + - [-156.09375, 17.811456] + - [-161.323242, 22.512557] + path: 'address.location' + - + $limit: 3 + - + $project: + _id: 0 + name: 1 + address: 1 diff --git a/etc/gen_atlas_search/yaml/search/in.yaml b/etc/gen_atlas_search/yaml/search/in.yaml new file mode 100644 index 000000000..cc1aa6c33 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/in.yaml @@ -0,0 +1,89 @@ +# $schema: ../schema.json +name: in +link: 'https://www.mongodb.com/docs/atlas/atlas-search/in/' +type: + - searchOperator +encode: object +description: | + The in operator performs a search for an array of BSON values in a field. +arguments: + - + name: path + type: + - searchPath + - + name: value + type: + - any + - array # of any + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Single Value Field Match' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/in/#examples' + pipeline: + - + $search: + in: + path: 'birthdate' + value: + - !bson_utcdatetime '1977-03-02T02:20:31.000+00:00' + - !bson_utcdatetime '1977-03-01T00:00:00.000+00:00' + - !bson_utcdatetime '1977-05-06T21:57:35.000+00:00' + - + $project: + _id: 0 + name: 1 + birthdate: 1 + + - + name: 'Array Value Field Match' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/in/#examples' + pipeline: + - + $search: + in: + path: 'accounts' + value: + - 371138 + - 371139 + - 371140 + - + $project: + _id: 0 + name: 1 + accounts: 1 + + - + name: 'Compound Query Match' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/in/#examples' + pipeline: + - + $search: + compound: + must: + - + in: + path: 'name' + value: + - 'james sanchez' + - 'jennifer lawrence' + should: + - + in: + path: '_id' + value: + - !bson_objectId '5ca4bbcea2dd94ee58162a72' + - !bson_objectId '5ca4bbcea2dd94ee58162a91' + - + $limit: 5 + - + $project: + _id: 1 + name: 1 + score: + $meta: 'searchScore' diff --git a/etc/gen_atlas_search/yaml/search/moreLikeThis.yaml b/etc/gen_atlas_search/yaml/search/moreLikeThis.yaml new file mode 100644 index 000000000..8c4803bdd --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/moreLikeThis.yaml @@ -0,0 +1,99 @@ +# $schema: ../schema.json +name: moreLikeThis +link: 'https://www.mongodb.com/docs/atlas/atlas-search/moreLikeThis/' +type: + - searchOperator +encode: object +description: | + The moreLikeThis operator returns documents similar to input documents. + The moreLikeThis operator allows you to build features for your applications + that display similar or alternative results based on one or more given documents. +arguments: + - + name: like + type: + - object + - array # of object + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Single Document with Multiple Fields' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/morelikethis/#example-1--single-document-with-multiple-fields' + pipeline: + - + $search: + moreLikeThis: + like: + title: 'The Godfather' + genres: 'action' + - + $limit: 5 + - + $project: + _id: 0 + title: 1 + released: 1 + genres: 1 + + - + name: 'Input Document Excluded in Results' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/morelikethis/#example-2--input-document-excluded-in-results' + pipeline: + - + $search: + compound: + must: + - + moreLikeThis: + like: + _id: !bson_objectId '573a1396f29313caabce4a9a' + genres: + - 'Crime' + - 'Drama' + title: 'The Godfather' + mustNot: + - + equals: + path: '_id' + value: !bson_objectId '573a1396f29313caabce4a9a' + - + $limit: 5 + - + $project: + _id: 1 + title: 1 + released: 1 + genres: 1 + + - + name: 'Multiple Analyzers' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/morelikethis/#example-3--multiple-analyzers' + pipeline: + - + $search: + compound: + should: + - + moreLikeThis: + like: + _id: !bson_objectId '573a1396f29313caabce4a9a' + genres: + - 'Crime' + - 'Drama' + title: 'The Godfather' + mustNot: + - + equals: + path: '_id' + value: !bson_objectId '573a1394f29313caabcde9ef' + - + $limit: 10 + - + $project: + title: 1 + genres: 1 + _id: 1 diff --git a/etc/gen_atlas_search/yaml/search/near.yaml b/etc/gen_atlas_search/yaml/search/near.yaml new file mode 100644 index 000000000..bd4119cf9 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/near.yaml @@ -0,0 +1,124 @@ +# $schema: ../schema.json +name: near +link: 'https://www.mongodb.com/docs/atlas/atlas-search/near/' +type: + - searchOperator +encode: object +description: | + The near operator supports querying and scoring numeric, date, and GeoJSON point values. +arguments: + - + name: path + type: + - searchPath + - + name: origin + type: + - date + - number + - geometry + - + name: pivot + type: + - number + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Number' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/near/#number-example' + pipeline: + - + $search: + index: 'runtimes' + near: + path: 'runtime' + origin: 279 + pivot: 2 + - + $limit: 7 + - + $project: + _id: 0 + title: 1 + runtime: 1 + score: + $meta: 'searchScore' + + - + name: 'Date' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/near/#date-example' + pipeline: + - + $search: + index: 'releaseddate' + near: + path: 'released' + origin: !bson_utcdatetime '1915-09-13T00:00:00.000+00:00' + pivot: 7776000000 + - + $limit: 3 + - + $project: + _id: 0 + title: 1 + released: 1 + score: + $meta: 'searchScore' + + - + name: 'GeoJSON Point' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/near/#geojson-point-examples' + pipeline: + - + $search: + near: + origin: + type: 'Point' + coordinates: + - -8.61308 + - 41.1413 + pivot: 1000 + path: 'address.location' + - + $limit: 3 + - + $project: + _id: 0 + name: 1 + address: 1 + score: + $meta: 'searchScore' + + - + name: 'Compound' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/near/#compound-example' + pipeline: + - + $search: + compound: + must: + text: + query: 'Apartment' + path: 'property_type' + should: + near: + origin: + type: 'Point' + coordinates: + - 114.15027 + - 22.28158 + pivot: 1000 + path: 'address.location' + - + $limit: 3 + - + $project: + _id: 0 + property_type: 1 + address: 1 + score: + $meta: 'searchScore' diff --git a/etc/gen_atlas_search/yaml/search/phrase.yaml b/etc/gen_atlas_search/yaml/search/phrase.yaml new file mode 100644 index 000000000..4d9b75c4e --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/phrase.yaml @@ -0,0 +1,109 @@ +# $schema: ../schema.json +name: phrase +link: 'https://www.mongodb.com/docs/atlas/atlas-search/phrase/' +type: + - searchOperator +encode: object +description: | + The phrase operator performs search for documents containing an ordered sequence of terms using the analyzer specified in the index configuration. +arguments: + - + name: path + type: + - searchPath + - + name: query + type: + - string + - array # of string + - + name: slop + optional: true + type: + - int + - + name: synonyms + optional: true + type: + - string + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Single Phrase' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/phrase/#single-phrase-example' + pipeline: + - + $search: + phrase: + path: 'title' + query: 'new york' + - + $limit: 10 + - + $project: + _id: 0 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Multiple Phrase' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/phrase/#multiple-phrases-example' + pipeline: + - + $search: + phrase: + path: 'title' + query: + - 'the man' + - 'the moon' + - + $limit: 10 + - + $project: + _id: 0 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Phrase Slop' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/phrase/#slop-example' + pipeline: + - + $search: + phrase: + path: 'title' + query: 'men women' + slop: 5 + - + $project: + _id: 0 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Phrase Synonyms' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/phrase/#synonyms-example' + pipeline: + - + $search: + phrase: + path: 'plot' + query: 'automobile race' + slop: 5 + synonyms: 'my_synonyms' + - + $limit: 5 + - + $project: + _id: 0 + plot: 1 + title: 1 + score: + $meta: 'searchScore' diff --git a/etc/gen_atlas_search/yaml/search/queryString.yaml b/etc/gen_atlas_search/yaml/search/queryString.yaml new file mode 100644 index 000000000..8202771c9 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/queryString.yaml @@ -0,0 +1,35 @@ +# $schema: ../schema.json +name: queryString +link: 'https://www.mongodb.com/docs/atlas/atlas-search/queryString/' +type: + - searchOperator +encode: object +description: | + +arguments: + - + name: defaultPath + type: + - searchPath + - + name: query + type: + - string + +# The various example from the doc are variations of the "query" parameter +# this is not pertinent for testing the aggregation builder, unless we create +# a queryString builder. +tests: + - + name: 'Boolean Operator Queries' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/queryString/#boolean-operator-queries' + pipeline: + - + $search: + queryString: + defaultPath: 'title' + query: 'Rocky AND (IV OR 4 OR Four)' + - + $project: + _id: 0 + title: 1 diff --git a/etc/gen_atlas_search/yaml/search/range.yaml b/etc/gen_atlas_search/yaml/search/range.yaml new file mode 100644 index 000000000..f42c69176 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/range.yaml @@ -0,0 +1,139 @@ +# $schema: ../schema.json +name: range +link: 'https://www.mongodb.com/docs/atlas/atlas-search/range/' +type: + - searchOperator +encode: object +description: | + The range operator supports querying and scoring numeric, date, and string values. + You can use this operator to find results that are within a given numeric, date, objectId, or letter (from the English alphabet) range. +arguments: + - + name: path + type: + - searchPath + - + name: gt + optional: true + type: + - date + - number + - string + - objectId + - + name: gte + optional: true + type: + - date + - number + - string + - objectId + - + name: lt + optional: true + type: + - date + - number + - string + - objectId + - + name: lte + optional: true + type: + - date + - number + - string + - objectId + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Number gte lte' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/range/#number-example' + pipeline: + - + $search: + range: + path: 'runtime' + gte: 2 + lte: 3 + - + $limit: 5 + - + $project: + _id: 0 + title: 1 + runtime: 1 + + - + name: 'Number lte' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/range/#number-example' + pipeline: + - + $search: + range: + path: 'runtime' + lte: 2 + - + $limit: 5 + - + $project: + _id: 0 + title: 1 + runtime: 1 + score: + $meta: 'searchScore' + + - + name: 'Date' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/range/#date-example' + pipeline: + - + $search: + range: + path: 'released' + gt: !bson_utcdatetime '2010-01-01T00:00:00.000Z' + lt: !bson_utcdatetime '2015-01-01T00:00:00.000Z' + - + $limit: 5 + - + $project: + _id: 0 + title: 1 + released: 1 + + - + name: 'ObjectId' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/range/#objectid-example' + pipeline: + - + $search: + range: + path: '_id' + gte: !bson_objectId '573a1396f29313caabce4a9a' + lte: !bson_objectId '573a1396f29313caabce4ae7' + - + $project: + _id: 1 + title: 1 + released: 1 + + - + name: 'String' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/range/#string-example' + pipeline: + - + $search: + range: + path: 'title' + gt: 'city' + lt: 'country' + - + $limit: 5 + - + $project: + _id: 0 + title: 1 diff --git a/etc/gen_atlas_search/yaml/search/regex.yaml b/etc/gen_atlas_search/yaml/search/regex.yaml new file mode 100644 index 000000000..869ffabde --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/regex.yaml @@ -0,0 +1,42 @@ +# $schema: ../schema.json +name: regex +link: 'https://www.mongodb.com/docs/atlas/atlas-search/regex/' +type: + - searchOperator +encode: object +description: | + regex interprets the query field as a regular expression. + regex is a term-level operator, meaning that the query field isn't analyzed. +arguments: + - + name: path + type: + - searchPath + - + name: query + type: + - string + - + name: allowAnalyzedField + optional: true + type: + - bool + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Regex' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/regex/#examples' + pipeline: + - + $search: + regex: + path: 'title' + query: '[0-9]{2} (.){4}s' + - + $project: + _id: 0 + title: 1 diff --git a/etc/gen_atlas_search/yaml/search/text.yaml b/etc/gen_atlas_search/yaml/search/text.yaml new file mode 100644 index 000000000..dbd48cdd0 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/text.yaml @@ -0,0 +1,194 @@ +# $schema: ../schema.json +name: text +link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/' +type: + - searchOperator +encode: object +description: | + The text operator performs a full-text search using the analyzer that you specify in the index configuration. + If you omit an analyzer, the text operator uses the default standard analyzer. +arguments: + - + name: path + type: + - searchPath + - + name: query + type: + - string + - + name: fuzzy + optional: true + type: + - object + - + name: matchCriteria + optional: true + type: + - string # "any" | "all" + - + name: synonyms + optional: true + type: + - string + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Basic' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/#basic-example' + pipeline: + - + $search: + text: + path: 'title' + query: 'surfer' + - + $project: + _id: 0 + title: 1 + score: + $meta: 'searchScore' + - + name: 'Fuzzy Default' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/#fuzzy-examples' + pipeline: + - + $search: + text: + path: 'title' + query: 'naw yark' + fuzzy: {} + - + $limit: 10 + - + $project: + _id: 0 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Fuzzy maxExpansions' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/#fuzzy-examples' + pipeline: + - + $search: + text: + path: 'title' + query: 'naw yark' + fuzzy: + maxEdits: 1 + maxExpansions: 100 + - + $limit: 10 + - + $project: + _id: 0 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Fuzzy prefixLength' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/#fuzzy-examples' + pipeline: + - + $search: + text: + path: 'title' + query: 'naw yark' + fuzzy: + maxEdits: 1 + prefixLength: 2 + - + $limit: 8 + - + $project: + _id: 1 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Match any Using equivalent Mapping' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/#match-any-using-equivalent-mapping' + pipeline: + - + $search: + text: + path: 'plot' + query: 'attire' + synonyms: 'my_synonyms' + matchCriteria: 'any' + - + $limit: 5 + - + $project: + _id: 0 + plot: 1 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Match any Using explicit Mapping' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/#match-any-using-explicit-mapping' + pipeline: + - + $search: + text: + path: 'plot' + query: 'boat race' + synonyms: 'my_synonyms' + matchCriteria: 'any' + - + $limit: 10 + - + $project: + _id: 0 + plot: 1 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Match all Using Synonyms' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/#match-all-using-synonyms' + pipeline: + - + $search: + text: + path: 'plot' + query: 'automobile race' + matchCriteria: 'all' + synonyms: 'my_synonyms' + - + $limit: 20 + - + $project: + _id: 0 + plot: 1 + title: 1 + score: + $meta: 'searchScore' + + - + name: 'Wildcard Path' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/text/' + pipeline: + - + $search: + text: + path: + wildcard: '*' + query: 'surfer' + - + $project: + _id: 0 + title: 1 + score: + $meta: 'searchScore' diff --git a/etc/gen_atlas_search/yaml/search/wildcard.yaml b/etc/gen_atlas_search/yaml/search/wildcard.yaml new file mode 100644 index 000000000..d17fb4803 --- /dev/null +++ b/etc/gen_atlas_search/yaml/search/wildcard.yaml @@ -0,0 +1,60 @@ +# $schema: ../schema.json +name: wildcard +link: 'https://www.mongodb.com/docs/atlas/atlas-search/wildcard/' +type: + - searchOperator +encode: object +description: | + The wildcard operator enables queries which use special characters in the search string that can match any character. +arguments: + - + name: path + type: + - searchPath + - + name: query + type: + - string + - + name: allowAnalyzedField + optional: true + type: + - bool + - + name: score + optional: true + type: + - searchScore +tests: + - + name: 'Wildcard Path' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/wildcard/#index-definition' + pipeline: + - + $search: + wildcard: + query: 'Wom?n *' + path: + wildcard: '*' + - + $limit: 5 + - + $project: + _id: 0 + title: 1 + + - + name: 'Escape Character Example' + link: 'https://www.mongodb.com/docs/atlas/atlas-search/wildcard/#escape-character-example' + pipeline: + - + $search: + wildcard: + query: '*\?' + path: 'title' + - + $limit: 5 + - + $project: + _id: 0 + title: 1 diff --git a/src/atlas_search.rs b/src/atlas_search.rs new file mode 100644 index 000000000..c82645190 --- /dev/null +++ b/src/atlas_search.rs @@ -0,0 +1,285 @@ +//! Helpers for building Atlas Search aggregation pipelines. + +mod gen; + +pub use gen::*; + +use std::marker::PhantomData; + +use crate::bson::{doc, Bson, Document}; + +/// A helper to build the aggregation stage for Atlas Search. Use one of the constructor functions +/// and chain optional value setters, and then convert to a pipeline stage [`Document`] via +/// [`into`](Into::into) or [`on_index`](AtlasSearch::on_index). +/// +/// ```no_run +/// # async fn wrapper() -> mongodb::error::Result<()> { +/// # use mongodb::{Collection, atlas_search::AtlasSearch, bson::{Document, doc}}; +/// # let collection: Collection = todo!(); +/// let cursor = collection.aggregate(vec![ +/// AtlasSearch::autocomplete("title", "pre") +/// .fuzzy(doc! { "maxEdits": 1, "prefixLength": 1, "maxExpansions": 256 }) +/// .into(), +/// doc! { +/// "$limit": 10, +/// }, +/// doc! { +/// "$project": { +/// "_id": 0, +/// "title": 1, +/// } +/// }, +/// ]).await?; +/// # Ok(()) +/// # } +pub struct AtlasSearch { + name: &'static str, + stage: Document, + _t: PhantomData, +} + +impl From> for Document { + fn from(value: AtlasSearch) -> Self { + doc! { + "$search": { + value.name: value.stage + } + } + } +} + +impl AtlasSearch { + /// Erase the type of this builder. Not typically needed, but can be useful to include builders + /// of different types in a single `Vec`: + /// ```no_run + /// # async fn wrapper() -> mongodb::error::Result<()> { + /// # use mongodb::{Collection, atlas_search::AtlasSearch, bson::{Document, doc}}; + /// # let collection: Collection = todo!(); + /// let cursor = collection.aggregate(vec![ + /// AtlasSearch::compound() + /// .must(vec![ + /// AtlasSearch::text("description", "varieties").unit(), + /// AtlasSearch::compound() + /// .should(AtlasSearch::text("description", "Fuji")) + /// .unit(), + /// ]) + /// .into(), + /// ]).await?; + /// # } + /// ``` + pub fn unit(self) -> AtlasSearch<()> { + AtlasSearch { + name: self.name, + stage: self.stage, + _t: PhantomData, + } + } + + /// Like [`into`](Into::into), converts this builder into an aggregate pipeline stage + /// [`Document`], but also specify the search index to use. + pub fn on_index(self, index: impl AsRef) -> Document { + doc! { + "$search": { + "index": index.as_ref(), + self.name: self.stage, + } + } + } +} + +impl IntoIterator for AtlasSearch { + type Item = AtlasSearch; + + type IntoIter = std::iter::Once>; + + fn into_iter(self) -> Self::IntoIter { + std::iter::once(self) + } +} + +/// Order in which to search for tokens. +#[derive(Debug, Clone, PartialEq)] +#[non_exhaustive] +pub enum TokenOrder { + /// Indicates tokens in the query can appear in any order in the documents. + Any, + /// Indicates tokens in the query must appear adjacent to each other or in the order specified + /// in the query in the documents. + Sequential, + /// Fallback for future compatibility. + Other(String), +} + +impl TokenOrder { + fn name(&self) -> &str { + match self { + Self::Any => "any", + Self::Sequential => "sequential", + Self::Other(s) => s.as_str(), + } + } +} + +/// Criteria to use to match the terms in the query. +#[derive(Debug, Clone, PartialEq)] +#[non_exhaustive] +pub enum MatchCriteria { + /// Return documents that contain any of the terms from the query field. + Any, + /// Only return documents that contain all of the terms from the query field. + All, + /// Fallback for future compatibility. + Other(String), +} + +impl MatchCriteria { + fn name(&self) -> &str { + match self { + Self::Any => "any", + Self::All => "all", + Self::Other(s) => s.as_str(), + } + } +} + +/// An Atlas Search operator parameter that can be either a string or array of strings. +pub trait StringOrArray { + #[allow(missing_docs)] + fn to_bson(self) -> Bson; +} + +impl StringOrArray for &str { + fn to_bson(self) -> Bson { + Bson::String(self.to_owned()) + } +} + +impl StringOrArray for String { + fn to_bson(self) -> Bson { + Bson::String(self) + } +} + +impl StringOrArray for &String { + fn to_bson(self) -> Bson { + Bson::String(self.clone()) + } +} + +impl StringOrArray for &[&str] { + fn to_bson(self) -> Bson { + Bson::Array(self.iter().map(|&s| Bson::String(s.to_owned())).collect()) + } +} + +impl StringOrArray for &[&str; N] { + fn to_bson(self) -> Bson { + Bson::Array(self.iter().map(|&s| Bson::String(s.to_owned())).collect()) + } +} + +impl StringOrArray for &[String] { + fn to_bson(self) -> Bson { + Bson::Array(self.iter().map(|s| Bson::String(s.clone())).collect()) + } +} + +impl StringOrArray for &[String; N] { + fn to_bson(self) -> Bson { + Bson::Array(self.iter().map(|s| Bson::String(s.clone())).collect()) + } +} + +impl StringOrArray for [String; N] { + fn to_bson(self) -> Bson { + Bson::Array(self.into_iter().map(Bson::String).collect()) + } +} + +impl StringOrArray for &[&String] { + fn to_bson(self) -> Bson { + Bson::Array(self.iter().map(|&s| Bson::String(s.clone())).collect()) + } +} + +impl StringOrArray for &[&String; N] { + fn to_bson(self) -> Bson { + Bson::Array(self.iter().map(|&s| Bson::String(s.clone())).collect()) + } +} + +impl StringOrArray for Vec<&str> { + fn to_bson(self) -> Bson { + Bson::Array( + self.into_iter() + .map(|s| Bson::String(s.to_owned())) + .collect(), + ) + } +} + +impl StringOrArray for Vec { + fn to_bson(self) -> Bson { + Bson::Array(self.into_iter().map(Bson::String).collect()) + } +} + +impl StringOrArray for Vec<&String> { + fn to_bson(self) -> Bson { + Bson::Array(self.into_iter().map(|s| Bson::String(s.clone())).collect()) + } +} + +#[tokio::test] +async fn api_flow() { + // This is currently intended as a testbed for how the API works, not as an actual test. + return; + + #[allow(unreachable_code)] + { + #[allow(unused_variables)] + let coll: crate::Collection = todo!(); + let _ = coll + .aggregate(vec![ + AtlasSearch::autocomplete("title", "pre") + .fuzzy(doc! { "maxEdits": 1, "prefixLength": 1, "maxExpansions": 256 }) + .into(), + doc! { + "$limit": 10, + }, + doc! { + "$project": { + "_id": 0, + "title": 1, + } + }, + ]) + .await; + let _ = coll + .aggregate(vec![ + AtlasSearch::text("plot", "baseball").into(), + doc! { "$limit": 3 }, + doc! { + "$project": { + "_id": 0, + "title": 1, + "plot": 1, + } + }, + ]) + .await; + let _ = coll + .aggregate(vec![ + AtlasSearch::compound() + .must(AtlasSearch::text("description", "varieties")) + .should(AtlasSearch::text("description", "Fuji")) + .into(), + doc! { + "$project": { + "score": { "$meta": "searchScore" } + } + }, + ]) + .await; + } +} diff --git a/src/atlas_search/gen.rs b/src/atlas_search/gen.rs new file mode 100644 index 000000000..31111c43b --- /dev/null +++ b/src/atlas_search/gen.rs @@ -0,0 +1,135 @@ +//! This file was autogenerated. Do not manually edit. +use super::*; +#[allow(missing_docs)] +pub struct Autocomplete; +impl AtlasSearch { + /**The autocomplete operator performs a search for a word or phrase that + contains a sequence of characters from an incomplete input string. The + fields that you intend to query with the autocomplete operator must be + indexed with the autocomplete data type in the collection's index definition. + */ + /// + ///For more details, see the [autocomplete operator reference](https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/). + pub fn autocomplete(path: impl StringOrArray, query: impl StringOrArray) -> Self { + AtlasSearch { + name: "autocomplete", + stage: doc! { + "path" : path.to_bson(), "query" : query.to_bson(), + }, + _t: PhantomData, + } + } + #[allow(missing_docs)] + pub fn token_order(mut self, token_order: TokenOrder) -> Self { + self.stage.insert("tokenOrder", token_order.name()); + self + } + #[allow(missing_docs)] + pub fn fuzzy(mut self, fuzzy: Document) -> Self { + self.stage.insert("fuzzy", fuzzy); + self + } + #[allow(missing_docs)] + pub fn score(mut self, score: Document) -> Self { + self.stage.insert("score", score); + self + } +} +#[allow(missing_docs)] +pub struct Text; +impl AtlasSearch { + /**The text operator performs a full-text search using the analyzer that you specify in the index configuration. + If you omit an analyzer, the text operator uses the default standard analyzer. + */ + /// + ///For more details, see the [text operator reference](https://www.mongodb.com/docs/atlas/atlas-search/text/). + pub fn text(path: impl StringOrArray, query: impl StringOrArray) -> Self { + AtlasSearch { + name: "text", + stage: doc! { + "path" : path.to_bson(), "query" : query.to_bson(), + }, + _t: PhantomData, + } + } + #[allow(missing_docs)] + pub fn fuzzy(mut self, fuzzy: Document) -> Self { + self.stage.insert("fuzzy", fuzzy); + self + } + #[allow(missing_docs)] + pub fn match_criteria(mut self, match_criteria: MatchCriteria) -> Self { + self.stage.insert("matchCriteria", match_criteria.name()); + self + } + #[allow(missing_docs)] + pub fn synonyms(mut self, synonyms: impl AsRef) -> Self { + self.stage.insert("synonyms", synonyms.as_ref()); + self + } + #[allow(missing_docs)] + pub fn score(mut self, score: Document) -> Self { + self.stage.insert("score", score); + self + } +} +#[allow(missing_docs)] +pub struct Compound; +impl AtlasSearch { + /**The compound operator combines two or more operators into a single query. + Each element of a compound query is called a clause, and each clause + consists of one or more sub-queries. + */ + /// + ///For more details, see the [compound operator reference](https://www.mongodb.com/docs/atlas/atlas-search/compound/). + pub fn compound() -> Self { + AtlasSearch { + name: "compound", + stage: doc! {}, + _t: PhantomData, + } + } + #[allow(missing_docs)] + pub fn must(mut self, must: impl IntoIterator>) -> Self { + self.stage.insert( + "must", + must.into_iter().map(Document::from).collect::>(), + ); + self + } + #[allow(missing_docs)] + pub fn must_not(mut self, must_not: impl IntoIterator>) -> Self { + self.stage.insert( + "mustNot", + must_not.into_iter().map(Document::from).collect::>(), + ); + self + } + #[allow(missing_docs)] + pub fn should(mut self, should: impl IntoIterator>) -> Self { + self.stage.insert( + "should", + should.into_iter().map(Document::from).collect::>(), + ); + self + } + #[allow(missing_docs)] + pub fn filter(mut self, filter: impl IntoIterator>) -> Self { + self.stage.insert( + "filter", + filter.into_iter().map(Document::from).collect::>(), + ); + self + } + #[allow(missing_docs)] + pub fn minimum_should_match(mut self, minimum_should_match: i32) -> Self { + self.stage + .insert("minimumShouldMatch", minimum_should_match); + self + } + #[allow(missing_docs)] + pub fn score(mut self, score: Document) -> Self { + self.stage.insert("score", score); + self + } +} diff --git a/src/lib.rs b/src/lib.rs index 7f0ab39e3..d41b2da1c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,6 +21,7 @@ pub mod options; pub use ::mongocrypt; pub mod action; +pub mod atlas_search; pub(crate) mod bson_compat; mod bson_util; pub mod change_stream;