Skip to content

Commit 41e3c4c

Browse files
authored
test(ai): Add gemini-2.5-flash to integration tests (#9110)
Added gemini-2.5-flash to our integration tests now that it's publicly available. The token counts differ slightly between 2.0-flash and 2.5-flash so I introduced conditionals when checking token counts.
1 parent b97eab3 commit 41e3c4c

File tree

3 files changed

+129
-69
lines changed

3 files changed

+129
-69
lines changed

packages/ai/integration/chat.test.ts

Lines changed: 67 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -76,56 +76,85 @@ describe('Chat Session', () => {
7676
'What is the capital of France?'
7777
);
7878
const response1 = result1.response;
79-
expect(response1.text().trim().toLowerCase()).to.include('paris');
79+
const result2 = await chat.sendMessage('And what about Italy?');
80+
const response2 = result2.response;
81+
const history = await chat.getHistory();
8082

81-
let history = await chat.getHistory();
82-
expect(history.length).to.equal(2);
83+
expect(response1.text().trim().toLowerCase()).to.include('paris');
84+
expect(response1.usageMetadata).to.not.be.null;
85+
expect(response2.text().trim().toLowerCase()).to.include('rome');
86+
expect(response2.usageMetadata).to.not.be.null;
87+
expect(history.length).to.equal(4);
8388
expect(history[0].role).to.equal('user');
8489
expect(history[0].parts[0].text).to.equal(
8590
'What is the capital of France?'
8691
);
8792
expect(history[1].role).to.equal('model');
8893
expect(history[1].parts[0].text?.toLowerCase()).to.include('paris');
89-
90-
expect(response1.usageMetadata).to.not.be.null;
91-
// Token counts can vary slightly in chat context
92-
expect(response1.usageMetadata!.promptTokenCount).to.be.closeTo(
93-
15, // "What is the capital of France?" + system instruction
94-
TOKEN_COUNT_DELTA + 2 // More variance for chat context
95-
);
96-
expect(response1.usageMetadata!.candidatesTokenCount).to.be.closeTo(
97-
8, // "Paris"
98-
TOKEN_COUNT_DELTA
99-
);
100-
expect(response1.usageMetadata!.totalTokenCount).to.be.closeTo(
101-
23, // "What is the capital of France?" + system instruction + "Paris"
102-
TOKEN_COUNT_DELTA + 3 // More variance for chat context
103-
);
104-
105-
const result2 = await chat.sendMessage('And what about Italy?');
106-
const response2 = result2.response;
107-
expect(response2.text().trim().toLowerCase()).to.include('rome');
108-
109-
history = await chat.getHistory();
110-
expect(history.length).to.equal(4);
11194
expect(history[2].role).to.equal('user');
11295
expect(history[2].parts[0].text).to.equal('And what about Italy?');
11396
expect(history[3].role).to.equal('model');
11497
expect(history[3].parts[0].text?.toLowerCase()).to.include('rome');
11598

116-
expect(response2.usageMetadata).to.not.be.null;
117-
expect(response2.usageMetadata!.promptTokenCount).to.be.closeTo(
118-
28, // History + "And what about Italy?" + system instruction
119-
TOKEN_COUNT_DELTA + 5 // More variance for chat context with history
120-
);
121-
expect(response2.usageMetadata!.candidatesTokenCount).to.be.closeTo(
122-
8,
123-
TOKEN_COUNT_DELTA
124-
);
125-
expect(response2.usageMetadata!.totalTokenCount).to.be.closeTo(
126-
36,
127-
TOKEN_COUNT_DELTA
128-
);
99+
if (model.model.includes('gemini-2.5-flash')) {
100+
// Token counts can vary slightly in chat context
101+
expect(response1.usageMetadata!.promptTokenCount).to.be.closeTo(
102+
17, // "What is the capital of France?" + system instruction
103+
TOKEN_COUNT_DELTA + 2 // More variance for chat context
104+
);
105+
expect(response1.usageMetadata!.candidatesTokenCount).to.be.closeTo(
106+
8, // "Paris"
107+
TOKEN_COUNT_DELTA
108+
);
109+
expect(response1.usageMetadata!.totalTokenCount).to.be.closeTo(
110+
49, // "What is the capital of France?" + system instruction + "Paris"
111+
TOKEN_COUNT_DELTA + 3 // More variance for chat context
112+
);
113+
expect(response1.usageMetadata!.totalTokenCount).to.be.closeTo(
114+
49, // "What is the capital of France?" + system instruction + "Paris"
115+
TOKEN_COUNT_DELTA + 3 // More variance for chat context
116+
);
117+
118+
expect(response2.usageMetadata!.promptTokenCount).to.be.closeTo(
119+
32, // History + "And what about Italy?" + system instruction
120+
TOKEN_COUNT_DELTA + 5 // More variance for chat context with history
121+
);
122+
expect(response2.usageMetadata!.candidatesTokenCount).to.be.closeTo(
123+
8,
124+
TOKEN_COUNT_DELTA
125+
);
126+
expect(response2.usageMetadata!.totalTokenCount).to.be.closeTo(
127+
68,
128+
TOKEN_COUNT_DELTA + 2
129+
);
130+
} else if (model.model.includes('gemini-2.0-flash')) {
131+
expect(response1.usageMetadata).to.not.be.null;
132+
// Token counts can vary slightly in chat context
133+
expect(response1.usageMetadata!.promptTokenCount).to.be.closeTo(
134+
15, // "What is the capital of France?" + system instruction
135+
TOKEN_COUNT_DELTA + 2 // More variance for chat context
136+
);
137+
expect(response1.usageMetadata!.candidatesTokenCount).to.be.closeTo(
138+
8, // "Paris"
139+
TOKEN_COUNT_DELTA
140+
);
141+
expect(response1.usageMetadata!.totalTokenCount).to.be.closeTo(
142+
23, // "What is the capital of France?" + system instruction + "Paris"
143+
TOKEN_COUNT_DELTA + 3 // More variance for chat context
144+
);
145+
expect(response2.usageMetadata!.promptTokenCount).to.be.closeTo(
146+
28, // History + "And what about Italy?" + system instruction
147+
TOKEN_COUNT_DELTA + 5 // More variance for chat context with history
148+
);
149+
expect(response2.usageMetadata!.candidatesTokenCount).to.be.closeTo(
150+
8,
151+
TOKEN_COUNT_DELTA
152+
);
153+
expect(response2.usageMetadata!.totalTokenCount).to.be.closeTo(
154+
36,
155+
TOKEN_COUNT_DELTA
156+
);
157+
}
129158
});
130159
});
131160
});

packages/ai/integration/constants.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ const backendNames: Map<BackendType, string> = new Map([
5252
[BackendType.VERTEX_AI, 'Vertex AI']
5353
]);
5454

55-
const modelNames: readonly string[] = ['gemini-2.0-flash'];
55+
const modelNames: readonly string[] = ['gemini-2.0-flash', 'gemini-2.5-flash'];
5656

5757
/**
5858
* Array of test configurations that is iterated over to get full coverage

packages/ai/integration/generate-content.test.ts

Lines changed: 61 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -81,36 +81,67 @@ describe('Generate Content', () => {
8181
expect(trimmedText).to.equal('Mountain View');
8282

8383
expect(response.usageMetadata).to.not.be.null;
84-
expect(response.usageMetadata!.promptTokenCount).to.be.closeTo(
85-
21,
86-
TOKEN_COUNT_DELTA
87-
);
88-
expect(response.usageMetadata!.candidatesTokenCount).to.be.closeTo(
89-
4,
90-
TOKEN_COUNT_DELTA
91-
);
92-
expect(response.usageMetadata!.totalTokenCount).to.be.closeTo(
93-
25,
94-
TOKEN_COUNT_DELTA * 2
95-
);
96-
expect(response.usageMetadata!.promptTokensDetails).to.not.be.null;
97-
expect(response.usageMetadata!.promptTokensDetails!.length).to.equal(1);
98-
expect(
99-
response.usageMetadata!.promptTokensDetails![0].modality
100-
).to.equal(Modality.TEXT);
101-
expect(
102-
response.usageMetadata!.promptTokensDetails![0].tokenCount
103-
).to.equal(21);
104-
expect(response.usageMetadata!.candidatesTokensDetails).to.not.be.null;
105-
expect(
106-
response.usageMetadata!.candidatesTokensDetails!.length
107-
).to.equal(1);
108-
expect(
109-
response.usageMetadata!.candidatesTokensDetails![0].modality
110-
).to.equal(Modality.TEXT);
111-
expect(
112-
response.usageMetadata!.candidatesTokensDetails![0].tokenCount
113-
).to.be.closeTo(4, TOKEN_COUNT_DELTA);
84+
85+
if (model.model.includes('gemini-2.5-flash')) {
86+
expect(response.usageMetadata!.promptTokenCount).to.be.closeTo(
87+
22,
88+
TOKEN_COUNT_DELTA
89+
);
90+
expect(response.usageMetadata!.candidatesTokenCount).to.be.closeTo(
91+
2,
92+
TOKEN_COUNT_DELTA
93+
);
94+
expect(response.usageMetadata!.totalTokenCount).to.be.closeTo(
95+
55,
96+
TOKEN_COUNT_DELTA * 2
97+
);
98+
expect(response.usageMetadata!.promptTokensDetails).to.not.be.null;
99+
expect(response.usageMetadata!.promptTokensDetails!.length).to.equal(
100+
1
101+
);
102+
expect(
103+
response.usageMetadata!.promptTokensDetails![0].modality
104+
).to.equal(Modality.TEXT);
105+
expect(
106+
response.usageMetadata!.promptTokensDetails![0].tokenCount
107+
).to.closeTo(22, TOKEN_COUNT_DELTA);
108+
109+
// candidatesTokenDetails comes back about half the time, so let's just not test it.
110+
} else if (model.model.includes('gemini-2.0-flash')) {
111+
expect(response.usageMetadata!.promptTokenCount).to.be.closeTo(
112+
21,
113+
TOKEN_COUNT_DELTA
114+
);
115+
expect(response.usageMetadata!.candidatesTokenCount).to.be.closeTo(
116+
4,
117+
TOKEN_COUNT_DELTA
118+
);
119+
expect(response.usageMetadata!.totalTokenCount).to.be.closeTo(
120+
25,
121+
TOKEN_COUNT_DELTA * 2
122+
);
123+
expect(response.usageMetadata!.promptTokensDetails).to.not.be.null;
124+
expect(response.usageMetadata!.promptTokensDetails!.length).to.equal(
125+
1
126+
);
127+
expect(
128+
response.usageMetadata!.promptTokensDetails![0].modality
129+
).to.equal(Modality.TEXT);
130+
expect(
131+
response.usageMetadata!.promptTokensDetails![0].tokenCount
132+
).to.equal(21);
133+
expect(response.usageMetadata!.candidatesTokensDetails).to.not.be
134+
.null;
135+
expect(
136+
response.usageMetadata!.candidatesTokensDetails!.length
137+
).to.equal(1);
138+
expect(
139+
response.usageMetadata!.candidatesTokensDetails![0].modality
140+
).to.equal(Modality.TEXT);
141+
expect(
142+
response.usageMetadata!.candidatesTokensDetails![0].tokenCount
143+
).to.be.closeTo(4, TOKEN_COUNT_DELTA);
144+
}
114145
});
115146

116147
it('generateContentStream: text input, text output', async () => {

0 commit comments

Comments
 (0)