diff --git a/apps/postgres-new/app/api/chat/route.ts b/apps/postgres-new/app/api/chat/route.ts index ae5f22a0..89faf26d 100644 --- a/apps/postgres-new/app/api/chat/route.ts +++ b/apps/postgres-new/app/api/chat/route.ts @@ -1,7 +1,7 @@ import { openai } from '@ai-sdk/openai' import { ToolInvocation, convertToCoreMessages, streamText } from 'ai' import { codeBlock } from 'common-tags' -import { convertToCoreTools, maxRowLimit, tools } from '~/lib/tools' +import { convertToCoreTools, maxMessageContext, maxRowLimit, tools } from '~/lib/tools' // Allow streaming responses up to 30 seconds export const maxDuration = 30 @@ -15,6 +15,9 @@ type Message = { export async function POST(req: Request) { const { messages }: { messages: Message[] } = await req.json() + // Trim the message context sent to the LLM to mitigate token abuse + const trimmedMessageContext = messages.slice(-maxMessageContext) + const result = await streamText({ system: codeBlock` You are a helpful database assistant. Under the hood you have access to an in-browser Postgres database called PGlite (https://github.com/electric-sql/pglite). @@ -59,7 +62,7 @@ export async function POST(req: Request) { Feel free to suggest corrections for suspected typos. `, model: openai('gpt-4o-2024-08-06'), - messages: convertToCoreMessages(messages), + messages: convertToCoreMessages(trimmedMessageContext), tools: convertToCoreTools(tools), }) diff --git a/apps/postgres-new/lib/tools.ts b/apps/postgres-new/lib/tools.ts index 750e0fb7..46a301b9 100644 --- a/apps/postgres-new/lib/tools.ts +++ b/apps/postgres-new/lib/tools.ts @@ -21,6 +21,11 @@ function result(schema: T) { */ export const maxRowLimit = 100 +/** + * The maximum number of messages from the chat history to send to the LLM. + */ +export const maxMessageContext = 30 + /** * Central location for all LLM tools including their * description, arg schema, and result schema.