inference-gateway · edenreich · Apr 26, 2025 · Apr 25, 2025 · Apr 25, 2025
diff --git a/README.md b/README.md
@@ -155,11 +155,11 @@ const client = new InferenceGatewayClient({
 try {
   await client.streamChatCompletion(
     {
-      model: 'gpt-4o',
+      model: 'openai/gpt-4o',
       messages: [
         {
           role: MessageRole.User,
-          content: 'What's the weather in San Francisco?',
+          content: "What's the weather in San Francisco?",
         },
       ],
       tools: [
@@ -186,10 +186,14 @@ try {
         console.log('Tool call:', toolCall.function.name);
         console.log('Arguments:', toolCall.function.arguments);
       },
-      onContent: (content) => process.stdout.write(content),
+      onReasoning: (reasoning) => {
+        console.log('Reasoning:', reasoning);
+      },
+      onContent: (content) => {
+        console.log('Content:', content);
+      },
       onFinish: () => console.log('\nStream completed'),
-    },
-    Provider.OpenAI
+    }
   );
 } catch (error) {
   console.error('Error:', error);

diff --git a/src/client.ts b/src/client.ts
@@ -12,6 +12,7 @@ import { ChatCompletionToolType } from './types/generated';
 interface ChatCompletionStreamCallbacks {
   onOpen?: () => void;
   onChunk?: (chunk: SchemaCreateChatCompletionStreamResponse) => void;
+  onReasoning?: (reasoningContent: string) => void;
   onContent?: (content: string) => void;
   onTool?: (toolCall: SchemaChatCompletionMessageToolCall) => void;
   onFinish?: (
@@ -257,6 +258,12 @@ export class InferenceGatewayClient {
                 JSON.parse(data);
               callbacks.onChunk?.(chunk);
 
+              const reasoning_content =
+                chunk.choices[0]?.delta?.reasoning_content;
+              if (reasoning_content !== undefined) {
+                callbacks.onReasoning?.(reasoning_content);
+              }
+
               const content = chunk.choices[0]?.delta?.content;
               if (content) {
                 callbacks.onContent?.(content);

diff --git a/tests/client.test.ts b/tests/client.test.ts
@@ -263,6 +263,68 @@ describe('InferenceGatewayClient', () => {
       );
     });
 
+    it('should handle streaming chat completions reasoning and content', async () => {
+      const mockRequest = {
+        model: 'gpt-4o',
+        messages: [
+          { role: MessageRole.user, content: 'Hello' },
+        ],
+        stream: true,
+      };
+      const mockStream = new TransformStream();
+      const writer = mockStream.writable.getWriter();
+      const encoder = new TextEncoder();
+      mockFetch.mockResolvedValueOnce({
+        ok: true,
+        body: mockStream.readable,
+      });
+      const callbacks = {
+        onOpen: jest.fn(),
+        onChunk: jest.fn(),
+        onReasoning: jest.fn(),
+        onContent: jest.fn(),
+        onFinish: jest.fn(),
+      };
+      const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
+      await writer.write(
+        encoder.encode(
+          'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"","reasoning_content":"This"},"finish_reason":null}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"","reasoning_content":" is"},"finish_reason":null}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"","reasoning_content":" a"},"finish_reason":"stop"}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"","reasoning_content":" reasoning"},"finish_reason":"stop"}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"","reasoning_content":" content"},"finish_reason":"stop"}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
+            'data: [DONE]\n\n'
+        )
+      );
+      await writer.close();
+      await streamPromise;
+      expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
+      expect(callbacks.onChunk).toHaveBeenCalledTimes(8);
+      expect(callbacks.onReasoning).toHaveBeenCalledTimes(5);
+      expect(callbacks.onReasoning).toHaveBeenCalledWith('This');
+      expect(callbacks.onReasoning).toHaveBeenCalledWith(' is');
+      expect(callbacks.onReasoning).toHaveBeenCalledWith(' a');
+      expect(callbacks.onReasoning).toHaveBeenCalledWith(' reasoning');
+      expect(callbacks.onReasoning).toHaveBeenCalledWith(' content');
+      expect(callbacks.onContent).toHaveBeenCalledTimes(2);
+      expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
+      expect(callbacks.onContent).toHaveBeenCalledWith('!');
+      expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
+      expect(mockFetch).toHaveBeenCalledWith(
+        'http://localhost:8080/v1/chat/completions',
+        expect.objectContaining({
+          method: 'POST',
+          body: JSON.stringify({
+            ...mockRequest,
+            stream: true,
+          }),
+        })
+      );
+    });
+
     it('should handle tool calls in streaming chat completions', async () => {
       const mockRequest = {
         model: 'gpt-4o',