Skip to content

Commit 218716f

Browse files
andrewheardpaulb777google-labs-jules[bot]gemini-code-assist[bot]
authored
[Firebase AI] Add URLContext tool (#15221)
Co-authored-by: Paul Beusterien <[email protected]> Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent a5bc774 commit 218716f

12 files changed

+427
-5
lines changed

FirebaseAI/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# Unreleased
2+
- [feature] Added support for the URL context tool, which allows the model to access content
3+
from provided public web URLs to inform and enhance its responses. (#15221)
24
- [changed] Using Firebase AI Logic with the Gemini Developer API is now Generally Available (GA).
35
- [changed] Using Firebase AI Logic with the Imagen generation APIs is now Generally Available (GA).
46

FirebaseAI/Sources/AILog.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ enum AILog {
6666
case codeExecutionResultUnrecognizedOutcome = 3015
6767
case executableCodeUnrecognizedLanguage = 3016
6868
case fallbackValueUsed = 3017
69+
case urlMetadataUnrecognizedURLRetrievalStatus = 3018
6970

7071
// SDK State Errors
7172
case generateContentResponseNoCandidates = 4000

FirebaseAI/Sources/GenerateContentResponse.swift

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ public struct GenerateContentResponse: Sendable {
2626
/// The total number of tokens across the generated response candidates.
2727
public let candidatesTokenCount: Int
2828

29+
/// The number of tokens used by tools.
30+
public let toolUsePromptTokenCount: Int
31+
2932
/// The number of tokens used by the model's internal "thinking" process.
3033
///
3134
/// For models that support thinking (like Gemini 2.5 Pro and Flash), this represents the actual
@@ -39,11 +42,15 @@ public struct GenerateContentResponse: Sendable {
3942
/// The total number of tokens in both the request and response.
4043
public let totalTokenCount: Int
4144

42-
/// The breakdown, by modality, of how many tokens are consumed by the prompt
45+
/// The breakdown, by modality, of how many tokens are consumed by the prompt.
4346
public let promptTokensDetails: [ModalityTokenCount]
4447

4548
/// The breakdown, by modality, of how many tokens are consumed by the candidates
4649
public let candidatesTokensDetails: [ModalityTokenCount]
50+
51+
/// The breakdown, by modality, of how many tokens were consumed by the tools used to process
52+
/// the request.
53+
public let toolUsePromptTokensDetails: [ModalityTokenCount]
4754
}
4855

4956
/// A list of candidate response content, ordered from best to worst.
@@ -154,14 +161,19 @@ public struct Candidate: Sendable {
154161

155162
public let groundingMetadata: GroundingMetadata?
156163

164+
/// Metadata related to the ``URLContext`` tool.
165+
public let urlContextMetadata: URLContextMetadata?
166+
157167
/// Initializer for SwiftUI previews or tests.
158168
public init(content: ModelContent, safetyRatings: [SafetyRating], finishReason: FinishReason?,
159-
citationMetadata: CitationMetadata?, groundingMetadata: GroundingMetadata? = nil) {
169+
citationMetadata: CitationMetadata?, groundingMetadata: GroundingMetadata? = nil,
170+
urlContextMetadata: URLContextMetadata? = nil) {
160171
self.content = content
161172
self.safetyRatings = safetyRatings
162173
self.finishReason = finishReason
163174
self.citationMetadata = citationMetadata
164175
self.groundingMetadata = groundingMetadata
176+
self.urlContextMetadata = urlContextMetadata
165177
}
166178

167179
// Returns `true` if the candidate contains no information that a developer could use.
@@ -469,17 +481,21 @@ extension GenerateContentResponse.UsageMetadata: Decodable {
469481
enum CodingKeys: CodingKey {
470482
case promptTokenCount
471483
case candidatesTokenCount
484+
case toolUsePromptTokenCount
472485
case thoughtsTokenCount
473486
case totalTokenCount
474487
case promptTokensDetails
475488
case candidatesTokensDetails
489+
case toolUsePromptTokensDetails
476490
}
477491

478492
public init(from decoder: any Decoder) throws {
479493
let container = try decoder.container(keyedBy: CodingKeys.self)
480494
promptTokenCount = try container.decodeIfPresent(Int.self, forKey: .promptTokenCount) ?? 0
481495
candidatesTokenCount =
482496
try container.decodeIfPresent(Int.self, forKey: .candidatesTokenCount) ?? 0
497+
toolUsePromptTokenCount =
498+
try container.decodeIfPresent(Int.self, forKey: .toolUsePromptTokenCount) ?? 0
483499
thoughtsTokenCount = try container.decodeIfPresent(Int.self, forKey: .thoughtsTokenCount) ?? 0
484500
totalTokenCount = try container.decodeIfPresent(Int.self, forKey: .totalTokenCount) ?? 0
485501
promptTokensDetails =
@@ -488,6 +504,9 @@ extension GenerateContentResponse.UsageMetadata: Decodable {
488504
[ModalityTokenCount].self,
489505
forKey: .candidatesTokensDetails
490506
) ?? []
507+
toolUsePromptTokensDetails = try container.decodeIfPresent(
508+
[ModalityTokenCount].self, forKey: .toolUsePromptTokensDetails
509+
) ?? []
491510
}
492511
}
493512

@@ -499,6 +518,7 @@ extension Candidate: Decodable {
499518
case finishReason
500519
case citationMetadata
501520
case groundingMetadata
521+
case urlContextMetadata
502522
}
503523

504524
/// Initializes a response from a decoder. Used for decoding server responses; not for public
@@ -540,6 +560,14 @@ extension Candidate: Decodable {
540560
GroundingMetadata.self,
541561
forKey: .groundingMetadata
542562
)
563+
564+
if let urlContextMetadata =
565+
try container.decodeIfPresent(URLContextMetadata.self, forKey: .urlContextMetadata),
566+
!urlContextMetadata.urlMetadata.isEmpty {
567+
self.urlContextMetadata = urlContextMetadata
568+
} else {
569+
urlContextMetadata = nil
570+
}
543571
}
544572
}
545573

FirebaseAI/Sources/Tool.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,15 @@ public struct Tool: Sendable {
7676
let googleSearch: GoogleSearch?
7777

7878
let codeExecution: CodeExecution?
79+
let urlContext: URLContext?
7980

8081
init(functionDeclarations: [FunctionDeclaration]? = nil,
8182
googleSearch: GoogleSearch? = nil,
83+
urlContext: URLContext? = nil,
8284
codeExecution: CodeExecution? = nil) {
8385
self.functionDeclarations = functionDeclarations
8486
self.googleSearch = googleSearch
87+
self.urlContext = urlContext
8588
self.codeExecution = codeExecution
8689
}
8790

@@ -128,6 +131,15 @@ public struct Tool: Sendable {
128131
return self.init(googleSearch: googleSearch)
129132
}
130133

134+
/// Creates a tool that allows you to provide additional context to the models in the form of
135+
/// public web URLs.
136+
///
137+
/// By including URLs in your request, the Gemini model will access the content from those pages
138+
/// to inform and enhance its response.
139+
public static func urlContext() -> Tool {
140+
return self.init(urlContext: URLContext())
141+
}
142+
131143
/// Creates a tool that allows the model to execute code.
132144
///
133145
/// For more details, see ``CodeExecution``.
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
16+
struct URLContext: Sendable, Encodable {
17+
init() {}
18+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
/// Metadata related to the ``Tool/urlContext()`` tool.
16+
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
17+
public struct URLContextMetadata: Sendable, Hashable {
18+
/// List of URL metadata used to provide context to the Gemini model.
19+
public let urlMetadata: [URLMetadata]
20+
}
21+
22+
// MARK: - Codable Conformances
23+
24+
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
25+
extension URLContextMetadata: Decodable {
26+
enum CodingKeys: CodingKey {
27+
case urlMetadata
28+
}
29+
30+
public init(from decoder: any Decoder) throws {
31+
let container = try decoder.container(keyedBy: CodingKeys.self)
32+
urlMetadata = try container.decodeIfPresent([URLMetadata].self, forKey: .urlMetadata) ?? []
33+
}
34+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
import Foundation
16+
17+
/// Metadata for a single URL retrieved by the ``Tool/urlContext()`` tool.
18+
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
19+
public struct URLMetadata: Sendable, Hashable {
20+
/// Status of the URL retrieval.
21+
public struct URLRetrievalStatus: DecodableProtoEnum, Hashable {
22+
enum Kind: String {
23+
case unspecified = "URL_RETRIEVAL_STATUS_UNSPECIFIED"
24+
case success = "URL_RETRIEVAL_STATUS_SUCCESS"
25+
case error = "URL_RETRIEVAL_STATUS_ERROR"
26+
case paywall = "URL_RETRIEVAL_STATUS_PAYWALL"
27+
case unsafe = "URL_RETRIEVAL_STATUS_UNSAFE"
28+
}
29+
30+
/// Internal only - Unspecified retrieval status.
31+
static let unspecified = URLRetrievalStatus(kind: .unspecified)
32+
33+
/// The URL retrieval was successful.
34+
public static let success = URLRetrievalStatus(kind: .success)
35+
36+
/// The URL retrieval failed.
37+
public static let error = URLRetrievalStatus(kind: .error)
38+
39+
/// The URL retrieval failed because the content is behind a paywall.
40+
public static let paywall = URLRetrievalStatus(kind: .paywall)
41+
42+
/// The URL retrieval failed because the content is unsafe.
43+
public static let unsafe = URLRetrievalStatus(kind: .unsafe)
44+
45+
/// Returns the raw string representation of the `URLRetrievalStatus` value.
46+
public let rawValue: String
47+
48+
static let unrecognizedValueMessageCode =
49+
AILog.MessageCode.urlMetadataUnrecognizedURLRetrievalStatus
50+
}
51+
52+
/// The retrieved URL.
53+
public let retrievedURL: URL?
54+
55+
/// The status of the URL retrieval.
56+
public let retrievalStatus: URLRetrievalStatus
57+
}
58+
59+
// MARK: - Codable Conformances
60+
61+
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
62+
extension URLMetadata: Decodable {
63+
enum CodingKeys: String, CodingKey {
64+
case retrievedURL = "retrievedUrl"
65+
case retrievalStatus = "urlRetrievalStatus"
66+
}
67+
68+
public init(from decoder: any Decoder) throws {
69+
let container = try decoder.container(keyedBy: CodingKeys.self)
70+
71+
if let retrievedURLString = try container.decodeIfPresent(String.self, forKey: .retrievedURL),
72+
let retrievedURL = URL(string: retrievedURLString) {
73+
self.retrievedURL = retrievedURL
74+
} else {
75+
retrievedURL = nil
76+
}
77+
let retrievalStatus = try container.decodeIfPresent(
78+
URLMetadata.URLRetrievalStatus.self, forKey: .retrievalStatus
79+
)
80+
81+
self.retrievalStatus = AILog.safeUnwrap(
82+
retrievalStatus, fallback: URLMetadata.URLRetrievalStatus(kind: .unspecified)
83+
)
84+
}
85+
}

FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,33 @@ struct GenerateContentIntegrationTests {
424424
}
425425
}
426426

427+
@Test(
428+
"generateContent with URL Context",
429+
arguments: InstanceConfig.allConfigs
430+
)
431+
func generateContent_withURLContext_succeeds(_ config: InstanceConfig) async throws {
432+
let model = FirebaseAI.componentInstance(config).generativeModel(
433+
modelName: ModelNames.gemini2_5_Flash,
434+
tools: [.urlContext()]
435+
)
436+
let prompt = """
437+
Write a one paragraph summary of this blog post: \
438+
https://developers.googleblog.com/en/introducing-gemma-3-270m/
439+
"""
440+
441+
let response = try await model.generateContent(prompt)
442+
443+
let candidate = try #require(response.candidates.first)
444+
let urlContextMetadata = try #require(candidate.urlContextMetadata)
445+
#expect(urlContextMetadata.urlMetadata.count == 1)
446+
let urlMetadata = try #require(urlContextMetadata.urlMetadata.first)
447+
let retrievedURL = try #require(urlMetadata.retrievedURL)
448+
#expect(
449+
retrievedURL == URL(string: "https://developers.googleblog.com/en/introducing-gemma-3-270m/")
450+
)
451+
#expect(urlMetadata.retrievalStatus == .success)
452+
}
453+
427454
@Test(arguments: InstanceConfig.allConfigs)
428455
func generateContent_codeExecution_succeeds(_ config: InstanceConfig) async throws {
429456
let model = FirebaseAI.componentInstance(config).generativeModel(

0 commit comments

Comments
 (0)