diff --git a/apps/postgres-new/app/api/chat/route.ts b/apps/postgres-new/app/api/chat/route.ts
index ae5f22a0..89faf26d 100644
--- a/apps/postgres-new/app/api/chat/route.ts
+++ b/apps/postgres-new/app/api/chat/route.ts
@@ -1,7 +1,7 @@
 import { openai } from '@ai-sdk/openai'
 import { ToolInvocation, convertToCoreMessages, streamText } from 'ai'
 import { codeBlock } from 'common-tags'
-import { convertToCoreTools, maxRowLimit, tools } from '~/lib/tools'
+import { convertToCoreTools, maxMessageContext, maxRowLimit, tools } from '~/lib/tools'
 
 // Allow streaming responses up to 30 seconds
 export const maxDuration = 30
@@ -15,6 +15,9 @@ type Message = {
 export async function POST(req: Request) {
   const { messages }: { messages: Message[] } = await req.json()
 
+  // Trim the message context sent to the LLM to mitigate token abuse
+  const trimmedMessageContext = messages.slice(-maxMessageContext)
+
   const result = await streamText({
     system: codeBlock`
       You are a helpful database assistant. Under the hood you have access to an in-browser Postgres database called PGlite (https://github.com/electric-sql/pglite).
@@ -59,7 +62,7 @@ export async function POST(req: Request) {
       Feel free to suggest corrections for suspected typos.
     `,
     model: openai('gpt-4o-2024-08-06'),
-    messages: convertToCoreMessages(messages),
+    messages: convertToCoreMessages(trimmedMessageContext),
     tools: convertToCoreTools(tools),
   })
 
diff --git a/apps/postgres-new/lib/tools.ts b/apps/postgres-new/lib/tools.ts
index 750e0fb7..46a301b9 100644
--- a/apps/postgres-new/lib/tools.ts
+++ b/apps/postgres-new/lib/tools.ts
@@ -21,6 +21,11 @@ function result<T extends z.ZodTypeAny>(schema: T) {
  */
 export const maxRowLimit = 100
 
+/**
+ * The maximum number of messages from the chat history to send to the LLM.
+ */
+export const maxMessageContext = 30
+
 /**
  * Central location for all LLM tools including their
  * description, arg schema, and result schema.