fix: limit the number of tokens a chat can use.

MarcMcIntosh · alashchev17 · commit ab05f484b283 · 2025-03-25T19:48:08.000+01:00
diff --git a/refact-agent/gui/src/components/ChatForm/ChatForm.tsx b/refact-agent/gui/src/components/ChatForm/ChatForm.tsx
@@ -19,6 +19,7 @@ import {
   useSendChatRequest,
   useCompressChat,
   useAutoFocusOnce,
+  useTotalTokenUsage,
 } from "../../hooks";
 import { ErrorCallout, Callout } from "../Callout";
 import { ComboBox } from "../ComboBox";
@@ -34,6 +35,7 @@ import { useInputValue } from "./useInputValue";
 import {
   clearInformation,
   getInformationMessage,
+  setInformation,
 } from "../../features/Errors/informationSlice";
 import { InformationCallout } from "../Callout/Callout";
 import { ToolConfirmation } from "./ToolConfirmation";
@@ -96,6 +98,16 @@ export const ChatForm: React.FC<ChatFormProps> = ({
   const { compressChat, compressChatRequest } = useCompressChat();
   const autoFocus = useAutoFocusOnce();
 
+  const { limitReached, tokens, limit } = useTotalTokenUsage();
+
+  useEffect(() => {
+    if (limitReached && !information) {
+      setInformation(
+        `Token Limit reached, ${tokens} out of ${limit} used. To continue click the compress button or start a new chat.`,
+      );
+    }
+  }, [tokens, limit, limitReached, information]);
+
   const shouldAgentCapabilitiesBeShown = useMemo(() => {
     return threadToolUse === "agent";
   }, [threadToolUse]);
@@ -117,6 +129,7 @@ export const ChatForm: React.FC<ChatFormProps> = ({
   const disableSend = useMemo(() => {
     // TODO: if interrupting chat some errors can occur
     if (allDisabled) return true;
+    if (limitReached) return true;
     // if (
     //   currentThreadMaximumContextTokens &&
     //   currentThreadUsage?.prompt_tokens &&
@@ -126,7 +139,15 @@ export const ChatForm: React.FC<ChatFormProps> = ({
     // if (arePromptTokensBiggerThanContext) return true;
     if (messages.length === 0) return false;
     return isWaiting || isStreaming || !isOnline || preventSend;
-  }, [isOnline, isStreaming, isWaiting, preventSend, messages, allDisabled]);
+  }, [
+    allDisabled,
+    limitReached,
+    messages.length,
+    isWaiting,
+    isStreaming,
+    isOnline,
+    preventSend,
+  ]);
 
   const { processAndInsertImages } = useAttachedImages();
   const handlePastingFile = useCallback(
diff --git a/refact-agent/gui/src/hooks/index.ts b/refact-agent/gui/src/hooks/index.ts
@@ -34,3 +34,4 @@ export * from "./useResizeObserver";
 export * from "./useCompressChat";
 export * from "./useAutoFocusOnce";
 export * from "./useHideScroll";
+export * from "./useTotalTokenUsage";
diff --git a/refact-agent/gui/src/hooks/useSendChatRequest.ts b/refact-agent/gui/src/hooks/useSendChatRequest.ts
@@ -56,6 +56,7 @@ import {
 
 import { v4 as uuidv4 } from "uuid";
 import { upsertToolCallIntoHistory } from "../features/History/historySlice";
+import { useTotalTokenUsage } from "./useTotalTokenUsage";
 
 type SubmitHandlerParams =
   | {
@@ -355,6 +356,7 @@ export function useAutoSend() {
   const sendImmediately = useAppSelector(selectSendImmediately);
   const wasInteracted = useAppSelector(getToolsInteractionStatus); // shows if tool confirmation popup was interacted by user
   const areToolsConfirmed = useAppSelector(getToolsConfirmationStatus);
+  const { limitReached } = useTotalTokenUsage();
   const { sendMessages, abort, messagesWithSystemPrompt } =
     useSendChatRequest();
   // TODO: make a selector for this, or show tool formation
@@ -379,6 +381,7 @@ export function useAutoSend() {
       const lastMessage = currentMessages.slice(-1)[0];
       // here ish
       if (
+        !limitReached &&
         isAssistantMessage(lastMessage) &&
         lastMessage.tool_calls &&
         lastMessage.tool_calls.length > 0
diff --git a/refact-agent/gui/src/hooks/useTotalTokenUsage.ts b/refact-agent/gui/src/hooks/useTotalTokenUsage.ts
@@ -0,0 +1,106 @@
+import { useMemo } from "react";
+import { useAppSelector } from "./useAppSelector";
+import { isAssistantMessage } from "../events";
+import { selectMessages } from "../features/Chat";
+import {
+  CompletionTokenDetails,
+  PromptTokenDetails,
+  type Usage,
+} from "../services/refact/chat";
+import { calculateUsageInputTokens } from "../utils/calculateUsageInputTokens";
+
+const TOKEN_LIMIT = 200_000;
+export function useTotalTokenUsage() {
+  const messages = useAppSelector(selectMessages);
+
+  const summedUsages = useMemo(() => {
+    return messages.reduce<Usage | null>((acc, message) => {
+      if (acc && isAssistantMessage(message) && message.usage) {
+        return sumUsages(acc, message.usage);
+      } else if (isAssistantMessage(message) && message.usage) {
+        return message.usage;
+      }
+      return acc;
+    }, null);
+  }, [messages]);
+
+  const tokens = useMemo(() => {
+    if (!summedUsages) return 0;
+    return calculateUsageInputTokens({
+      keys: [
+        "prompt_tokens",
+        "cache_creation_input_tokens",
+        "cache_read_input_tokens",
+        "completion_tokens",
+      ],
+      usage: summedUsages,
+    });
+  }, [summedUsages]);
+
+  const limitReached = useMemo(() => {
+    return tokens >= TOKEN_LIMIT;
+  }, [tokens]);
+
+  return {
+    summedUsages,
+    tokens,
+    limitReached,
+    limit: TOKEN_LIMIT,
+  };
+}
+
+function addCompletionDetails(
+  a: CompletionTokenDetails | null,
+  b: CompletionTokenDetails | null,
+): CompletionTokenDetails | null {
+  if (!a && !b) return null;
+  if (!a) return b;
+  if (!b) return a;
+
+  return {
+    accepted_prediction_tokens:
+      a.accepted_prediction_tokens + b.accepted_prediction_tokens,
+    audio_tokens: a.audio_tokens + b.audio_tokens,
+    reasoning_tokens: a.reasoning_tokens + b.reasoning_tokens,
+    rejected_prediction_tokens:
+      a.rejected_prediction_tokens + b.rejected_prediction_tokens,
+  };
+}
+
+function addPromptTokenDetails(
+  a: PromptTokenDetails | null,
+  b: PromptTokenDetails | null,
+): PromptTokenDetails | null {
+  if (!a && !b) return null;
+  if (!a) return b;
+  if (!b) return a;
+
+  return {
+    audio_tokens: a.audio_tokens + b.audio_tokens,
+    cached_tokens: a.cached_tokens + b.cached_tokens,
+  };
+}
+
+function sumUsages(a: Usage, b: Usage): Usage {
+  const completionDetails = addCompletionDetails(
+    a.completion_tokens_details,
+    b.completion_tokens_details,
+  );
+  const promptDetails = addPromptTokenDetails(
+    a.prompt_tokens_details,
+    b.prompt_tokens_details,
+  );
+
+  return {
+    completion_tokens: a.completion_tokens + b.completion_tokens,
+    prompt_tokens: a.prompt_tokens + b.prompt_tokens,
+    total_tokens: a.total_tokens + b.total_tokens,
+    completion_tokens_details: completionDetails,
+    prompt_tokens_details: promptDetails,
+    cache_creation_input_tokens:
+      (a.cache_creation_input_tokens || 0) +
+      (b.cache_creation_input_tokens || 0),
+    cache_read_input_tokens:
+      (a.cache_read_input_tokens || 0) + (b.cache_read_input_tokens || 0),
+  };
+}