From 9813355d4bb281db616ada45bf95bff90e6db2d8 Mon Sep 17 00:00:00 2001 From: waldekmastykarz Date: Sun, 13 Jul 2025 12:48:24 +0200 Subject: [PATCH 1/3] Adds LanguageModelRateLimitingPlugin. Closes #1309 --- .../LanguageModel/OpenAIModels.cs | 8 +- ...geModelRateLimitingCustomResponseLoader.cs | 40 ++ .../LanguageModelRateLimitingPlugin.cs | 349 ++++++++++++++++++ ...itingplugin.customresponsefile.schema.json | 46 +++ ...anguagemodelratelimitingplugin.schema.json | 43 +++ 5 files changed, 482 insertions(+), 4 deletions(-) create mode 100644 DevProxy.Plugins/Behavior/LanguageModelRateLimitingCustomResponseLoader.cs create mode 100644 DevProxy.Plugins/Behavior/LanguageModelRateLimitingPlugin.cs create mode 100644 schemas/v1.0.0/languagemodelratelimitingplugin.customresponsefile.schema.json create mode 100644 schemas/v1.0.0/languagemodelratelimitingplugin.schema.json diff --git a/DevProxy.Abstractions/LanguageModel/OpenAIModels.cs b/DevProxy.Abstractions/LanguageModel/OpenAIModels.cs index fdf7c029..861a1d1f 100644 --- a/DevProxy.Abstractions/LanguageModel/OpenAIModels.cs +++ b/DevProxy.Abstractions/LanguageModel/OpenAIModels.cs @@ -6,7 +6,7 @@ namespace DevProxy.Abstractions.LanguageModel; -public abstract class OpenAIRequest +public class OpenAIRequest { [JsonPropertyName("frequency_penalty")] public long? FrequencyPenalty { get; set; } @@ -22,7 +22,7 @@ public abstract class OpenAIRequest public double? TopP { get; set; } } -public abstract class OpenAIResponse : ILanguageModelCompletionResponse +public class OpenAIResponse : ILanguageModelCompletionResponse { public long Created { get; set; } public OpenAIError? Error { get; set; } @@ -37,12 +37,12 @@ public abstract class OpenAIResponse : ILanguageModelCompletionResponse public string? RequestUrl { get; set; } public string? ErrorMessage => Error?.Message; - public abstract string? Response { get; } + public virtual string? Response { get; } public OpenAIResponse ConvertToOpenAIResponse() => this; } -public abstract class OpenAIResponse : OpenAIResponse +public class OpenAIResponse : OpenAIResponse { public IEnumerable? Choices { get; set; } } diff --git a/DevProxy.Plugins/Behavior/LanguageModelRateLimitingCustomResponseLoader.cs b/DevProxy.Plugins/Behavior/LanguageModelRateLimitingCustomResponseLoader.cs new file mode 100644 index 00000000..cb5f3860 --- /dev/null +++ b/DevProxy.Plugins/Behavior/LanguageModelRateLimitingCustomResponseLoader.cs @@ -0,0 +1,40 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using DevProxy.Abstractions.Utils; +using DevProxy.Abstractions.Proxy; +using DevProxy.Abstractions.Plugins; +using Microsoft.Extensions.Logging; +using System.Text.Json; +using DevProxy.Abstractions.Models; + +namespace DevProxy.Plugins.Behavior; + +internal sealed class LanguageModelRateLimitingCustomResponseLoader( + HttpClient httpClient, + ILogger logger, + LanguageModelRateLimitConfiguration configuration, + IProxyConfiguration proxyConfiguration) : + BaseLoader(httpClient, logger, proxyConfiguration) +{ + private readonly LanguageModelRateLimitConfiguration _configuration = configuration; + + protected override string FilePath => _configuration.CustomResponseFile; + + protected override void LoadData(string fileContents) + { + try + { + var response = JsonSerializer.Deserialize(fileContents, ProxyUtils.JsonSerializerOptions); + if (response is not null) + { + _configuration.CustomResponse = response; + } + } + catch (Exception ex) + { + Logger.LogError(ex, "An error has occurred while reading {ConfigurationFile}:", _configuration.CustomResponseFile); + } + } +} diff --git a/DevProxy.Plugins/Behavior/LanguageModelRateLimitingPlugin.cs b/DevProxy.Plugins/Behavior/LanguageModelRateLimitingPlugin.cs new file mode 100644 index 00000000..3c7f02c3 --- /dev/null +++ b/DevProxy.Plugins/Behavior/LanguageModelRateLimitingPlugin.cs @@ -0,0 +1,349 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using DevProxy.Abstractions.LanguageModel; +using DevProxy.Abstractions.Models; +using DevProxy.Abstractions.Plugins; +using DevProxy.Abstractions.Proxy; +using DevProxy.Abstractions.Utils; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using System.Globalization; +using System.Net; +using System.Text.Json; +using Titanium.Web.Proxy.Http; +using Titanium.Web.Proxy.Models; + +namespace DevProxy.Plugins.Behavior; + +public enum TokenLimitResponseWhenExceeded +{ + Throttle, + Custom +} + +public sealed class LanguageModelRateLimitConfiguration +{ + public MockResponseResponse? CustomResponse { get; set; } + public string CustomResponseFile { get; set; } = "token-limit-response.json"; + public string HeaderRetryAfter { get; set; } = "retry-after"; + public int ResetTimeWindowSeconds { get; set; } = 60; // 1 minute + public int PromptTokenLimit { get; set; } = 5000; + public int CompletionTokenLimit { get; set; } = 5000; + public TokenLimitResponseWhenExceeded WhenLimitExceeded { get; set; } = TokenLimitResponseWhenExceeded.Throttle; +} + +public sealed class LanguageModelRateLimitingPlugin( + HttpClient httpClient, + ILogger logger, + ISet urlsToWatch, + IProxyConfiguration proxyConfiguration, + IConfigurationSection pluginConfigurationSection) : + BasePlugin( + httpClient, + logger, + urlsToWatch, + proxyConfiguration, + pluginConfigurationSection) +{ + // initial values so that we know when we intercept the + // first request and can set the initial values + private int _promptTokensRemaining = -1; + private int _completionTokensRemaining = -1; + private DateTime _resetTime = DateTime.MinValue; + private LanguageModelRateLimitingCustomResponseLoader? _loader; + + public override string Name => nameof(LanguageModelRateLimitingPlugin); + + public override async Task InitializeAsync(InitArgs e, CancellationToken cancellationToken) + { + ArgumentNullException.ThrowIfNull(e); + + await base.InitializeAsync(e, cancellationToken); + + if (Configuration.WhenLimitExceeded == TokenLimitResponseWhenExceeded.Custom) + { + Configuration.CustomResponseFile = ProxyUtils.GetFullPath(Configuration.CustomResponseFile, ProxyConfiguration.ConfigFile); + _loader = ActivatorUtilities.CreateInstance(e.ServiceProvider, Configuration); + await _loader.InitFileWatcherAsync(cancellationToken); + } + } + + public override Task BeforeRequestAsync(ProxyRequestArgs e, CancellationToken cancellationToken) + { + Logger.LogTrace("{Method} called", nameof(BeforeRequestAsync)); + + ArgumentNullException.ThrowIfNull(e); + + var session = e.Session; + var state = e.ResponseState; + if (state.HasBeenSet) + { + Logger.LogRequest("Response already set", MessageType.Skipped, new(e.Session)); + return Task.CompletedTask; + } + if (!e.HasRequestUrlMatch(UrlsToWatch)) + { + Logger.LogRequest("URL not matched", MessageType.Skipped, new(e.Session)); + return Task.CompletedTask; + } + + var request = e.Session.HttpClient.Request; + if (request.Method is null || + !request.Method.Equals("POST", StringComparison.OrdinalIgnoreCase) || + !request.HasBody) + { + Logger.LogRequest("Request is not a POST request with a body", MessageType.Skipped, new(e.Session)); + return Task.CompletedTask; + } + + if (!TryGetOpenAIRequest(request.BodyString, out var openAiRequest)) + { + Logger.LogRequest("Skipping non-OpenAI request", MessageType.Skipped, new(e.Session)); + return Task.CompletedTask; + } + + // set the initial values for the first request + if (_resetTime == DateTime.MinValue) + { + _resetTime = DateTime.Now.AddSeconds(Configuration.ResetTimeWindowSeconds); + } + if (_promptTokensRemaining == -1) + { + _promptTokensRemaining = Configuration.PromptTokenLimit; + _completionTokensRemaining = Configuration.CompletionTokenLimit; + } + + // see if we passed the reset time window + if (DateTime.Now > _resetTime) + { + _promptTokensRemaining = Configuration.PromptTokenLimit; + _completionTokensRemaining = Configuration.CompletionTokenLimit; + _resetTime = DateTime.Now.AddSeconds(Configuration.ResetTimeWindowSeconds); + } + + // check if we have tokens available + if (_promptTokensRemaining <= 0 || _completionTokensRemaining <= 0) + { + Logger.LogRequest($"Exceeded token limit when calling {request.Url}. Request will be throttled", MessageType.Failed, new(e.Session)); + + if (Configuration.WhenLimitExceeded == TokenLimitResponseWhenExceeded.Throttle) + { + if (!e.GlobalData.TryGetValue(RetryAfterPlugin.ThrottledRequestsKey, out var value)) + { + value = new List(); + e.GlobalData.Add(RetryAfterPlugin.ThrottledRequestsKey, value); + } + + var throttledRequests = value as List; + throttledRequests?.Add(new( + BuildThrottleKey(request), + ShouldThrottle, + _resetTime + )); + ThrottleResponse(e); + state.HasBeenSet = true; + } + else + { + if (Configuration.CustomResponse is not null) + { + var headersList = Configuration.CustomResponse.Headers is not null ? + Configuration.CustomResponse.Headers.Select(h => new HttpHeader(h.Name, h.Value)).ToList() : + []; + + var retryAfterHeader = headersList.FirstOrDefault(h => h.Name.Equals(Configuration.HeaderRetryAfter, StringComparison.OrdinalIgnoreCase)); + if (retryAfterHeader is not null && retryAfterHeader.Value == "@dynamic") + { + headersList.Add(new(Configuration.HeaderRetryAfter, ((int)(_resetTime - DateTime.Now).TotalSeconds).ToString(CultureInfo.InvariantCulture))); + _ = headersList.Remove(retryAfterHeader); + } + + var headers = headersList.ToArray(); + + // allow custom throttling response + var responseCode = (HttpStatusCode)(Configuration.CustomResponse.StatusCode ?? 200); + if (responseCode == HttpStatusCode.TooManyRequests) + { + if (!e.GlobalData.TryGetValue(RetryAfterPlugin.ThrottledRequestsKey, out var value)) + { + value = new List(); + e.GlobalData.Add(RetryAfterPlugin.ThrottledRequestsKey, value); + } + + var throttledRequests = value as List; + throttledRequests?.Add(new( + BuildThrottleKey(request), + ShouldThrottle, + _resetTime + )); + } + + string body = Configuration.CustomResponse.Body is not null ? + JsonSerializer.Serialize(Configuration.CustomResponse.Body, ProxyUtils.JsonSerializerOptions) : + ""; + e.Session.GenericResponse(body, responseCode, headers); + state.HasBeenSet = true; + } + else + { + Logger.LogRequest($"Custom behavior not set. {Configuration.CustomResponseFile} not found.", MessageType.Failed, new(e.Session)); + } + } + } + else + { + Logger.LogDebug("Tokens remaining - Prompt: {PromptTokensRemaining}, Completion: {CompletionTokensRemaining}", _promptTokensRemaining, _completionTokensRemaining); + } + + return Task.CompletedTask; + } + + public override Task BeforeResponseAsync(ProxyResponseArgs e, CancellationToken cancellationToken) + { + Logger.LogTrace("{Method} called", nameof(BeforeResponseAsync)); + + ArgumentNullException.ThrowIfNull(e); + + if (!e.HasRequestUrlMatch(UrlsToWatch)) + { + Logger.LogRequest("URL not matched", MessageType.Skipped, new(e.Session)); + return Task.CompletedTask; + } + + var request = e.Session.HttpClient.Request; + if (request.Method is null || + !request.Method.Equals("POST", StringComparison.OrdinalIgnoreCase) || + !request.HasBody) + { + Logger.LogDebug("Skipping non-POST request"); + return Task.CompletedTask; + } + + if (!TryGetOpenAIRequest(request.BodyString, out var openAiRequest)) + { + Logger.LogDebug("Skipping non-OpenAI request"); + return Task.CompletedTask; + } + + // Read the response body to get token usage + var response = e.Session.HttpClient.Response; + if (response.HasBody) + { + var responseBody = response.BodyString; + if (!string.IsNullOrEmpty(responseBody)) + { + try + { + var openAiResponse = JsonSerializer.Deserialize(responseBody, ProxyUtils.JsonSerializerOptions); + if (openAiResponse?.Usage != null) + { + var promptTokens = (int)openAiResponse.Usage.PromptTokens; + var completionTokens = (int)openAiResponse.Usage.CompletionTokens; + + _promptTokensRemaining -= promptTokens; + _completionTokensRemaining -= completionTokens; + + if (_promptTokensRemaining < 0) + { + _promptTokensRemaining = 0; + } + if (_completionTokensRemaining < 0) + { + _completionTokensRemaining = 0; + } + + Logger.LogRequest($"Consumed {promptTokens} prompt tokens and {completionTokens} completion tokens. Remaining - Prompt: {_promptTokensRemaining}, Completion: {_completionTokensRemaining}", MessageType.Processed, new(e.Session)); + } + } + catch (JsonException ex) + { + Logger.LogDebug(ex, "Failed to parse OpenAI response for token usage"); + } + } + } + + Logger.LogTrace("Left {Name}", nameof(BeforeResponseAsync)); + return Task.CompletedTask; + } + + private bool TryGetOpenAIRequest(string content, out OpenAIRequest? request) + { + request = null; + + if (string.IsNullOrEmpty(content)) + { + return false; + } + + try + { + Logger.LogDebug("Checking if the request is an OpenAI request..."); + + var rawRequest = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); + + if (rawRequest.TryGetProperty("prompt", out _)) + { + Logger.LogDebug("Request is a completion request"); + request = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); + return true; + } + + if (rawRequest.TryGetProperty("messages", out _)) + { + Logger.LogDebug("Request is a chat completion request"); + request = JsonSerializer.Deserialize(content, ProxyUtils.JsonSerializerOptions); + return true; + } + + Logger.LogDebug("Request is not an OpenAI request."); + return false; + } + catch (JsonException ex) + { + Logger.LogDebug(ex, "Failed to deserialize OpenAI request."); + return false; + } + } + + private ThrottlingInfo ShouldThrottle(Request request, string throttlingKey) + { + var throttleKeyForRequest = BuildThrottleKey(request); + return new(throttleKeyForRequest == throttlingKey ? + (int)(_resetTime - DateTime.Now).TotalSeconds : 0, + Configuration.HeaderRetryAfter); + } + + private void ThrottleResponse(ProxyRequestArgs e) + { + var headers = new List(); + var body = string.Empty; + var request = e.Session.HttpClient.Request; + + // Build standard OpenAI error response for token limit exceeded + var openAiError = new + { + error = new + { + message = "You exceeded your current quota, please check your plan and billing details.", + type = "insufficient_quota", + param = (object?)null, + code = "insufficient_quota" + } + }; + body = JsonSerializer.Serialize(openAiError, ProxyUtils.JsonSerializerOptions); + + headers.Add(new(Configuration.HeaderRetryAfter, ((int)(_resetTime - DateTime.Now).TotalSeconds).ToString(CultureInfo.InvariantCulture))); + if (request.Headers.Any(h => h.Name.Equals("Origin", StringComparison.OrdinalIgnoreCase))) + { + headers.Add(new("Access-Control-Allow-Origin", "*")); + headers.Add(new("Access-Control-Expose-Headers", Configuration.HeaderRetryAfter)); + } + + e.Session.GenericResponse(body, HttpStatusCode.TooManyRequests, [.. headers.Select(h => new HttpHeader(h.Name, h.Value))]); + } + + private static string BuildThrottleKey(Request r) => r.RequestUri.Host; +} diff --git a/schemas/v1.0.0/languagemodelratelimitingplugin.customresponsefile.schema.json b/schemas/v1.0.0/languagemodelratelimitingplugin.customresponsefile.schema.json new file mode 100644 index 00000000..cbb6b903 --- /dev/null +++ b/schemas/v1.0.0/languagemodelratelimitingplugin.customresponsefile.schema.json @@ -0,0 +1,46 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Dev Proxy LanguageModelRateLimitingPlugin response", + "description": "Mock for the Dev Proxy LanguageModelRateLimitingPlugin", + "type": "object", + "properties": { + "$schema": { + "type": "string", + "description": "The URL of the JSON schema used to validate this custom response file." + }, + "body": { + "type": [ + "object", + "array", + "string" + ], + "description": "The body of the custom response returned when the token limit is exceeded. Can be an object, array, or string." + }, + "statusCode": { + "type": "integer", + "description": "HTTP status code to return when the token limit is exceeded (e.g., 429)." + }, + "headers": { + "type": "array", + "description": "List of headers to include in the custom response.", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Header name." + }, + "value": { + "type": "string", + "description": "Header value. Use '@dynamic' for the Retry-After header to automatically calculate seconds until reset." + } + }, + "required": [ + "name", + "value" + ] + } + } + }, + "additionalProperties": false +} diff --git a/schemas/v1.0.0/languagemodelratelimitingplugin.schema.json b/schemas/v1.0.0/languagemodelratelimitingplugin.schema.json new file mode 100644 index 00000000..1cc836bb --- /dev/null +++ b/schemas/v1.0.0/languagemodelratelimitingplugin.schema.json @@ -0,0 +1,43 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Dev Proxy LanguageModelRateLimitingPlugin config schema", + "type": "object", + "properties": { + "$schema": { + "type": "string", + "description": "The URL of the JSON schema used to validate this configuration file." + }, + "headerRetryAfter": { + "type": "string", + "description": "Name of the response header that communicates the retry-after period (e.g., 'Retry-After')." + }, + "resetTimeWindowSeconds": { + "type": "integer", + "minimum": 1, + "description": "How long in seconds until the next token limit reset." + }, + "promptTokenLimit": { + "type": "integer", + "minimum": 1, + "description": "Number of prompt tokens allowed per time window." + }, + "completionTokenLimit": { + "type": "integer", + "minimum": 1, + "description": "Number of completion tokens allowed per time window." + }, + "whenLimitExceeded": { + "type": "string", + "enum": [ + "Throttle", + "Custom" + ], + "description": "Behavior when the token limit is exceeded: 'Throttle' (default throttling) or 'Custom' (custom response)." + }, + "customResponseFile": { + "type": "string", + "description": "Path to a file containing a custom error response to use when the token limit is exceeded." + } + }, + "additionalProperties": false +} From f317c9044d531a55a6909b767fa4f28015397d8a Mon Sep 17 00:00:00 2001 From: Waldek Mastykarz Date: Mon, 14 Jul 2025 11:27:27 +0200 Subject: [PATCH 2/3] Update DevProxy.Plugins/Behavior/LanguageModelRateLimitingPlugin.cs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- DevProxy.Plugins/Behavior/LanguageModelRateLimitingPlugin.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/DevProxy.Plugins/Behavior/LanguageModelRateLimitingPlugin.cs b/DevProxy.Plugins/Behavior/LanguageModelRateLimitingPlugin.cs index 3c7f02c3..cb66e639 100644 --- a/DevProxy.Plugins/Behavior/LanguageModelRateLimitingPlugin.cs +++ b/DevProxy.Plugins/Behavior/LanguageModelRateLimitingPlugin.cs @@ -190,6 +190,8 @@ public override Task BeforeRequestAsync(ProxyRequestArgs e, CancellationToken ca else { Logger.LogRequest($"Custom behavior not set. {Configuration.CustomResponseFile} not found.", MessageType.Failed, new(e.Session)); + e.Session.GenericResponse("Custom response file not found.", HttpStatusCode.InternalServerError, Array.Empty()); + state.HasBeenSet = true; } } } From 2e9974cc12407c9b5a27cc7bd0e66d47eedd7e94 Mon Sep 17 00:00:00 2001 From: Waldek Mastykarz Date: Mon, 14 Jul 2025 11:27:57 +0200 Subject: [PATCH 3/3] Fix: Update response handling for missing custom response file in LanguageModelRateLimitingPlugin --- DevProxy.Plugins/Behavior/LanguageModelRateLimitingPlugin.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DevProxy.Plugins/Behavior/LanguageModelRateLimitingPlugin.cs b/DevProxy.Plugins/Behavior/LanguageModelRateLimitingPlugin.cs index cb66e639..5dcc2121 100644 --- a/DevProxy.Plugins/Behavior/LanguageModelRateLimitingPlugin.cs +++ b/DevProxy.Plugins/Behavior/LanguageModelRateLimitingPlugin.cs @@ -190,7 +190,7 @@ public override Task BeforeRequestAsync(ProxyRequestArgs e, CancellationToken ca else { Logger.LogRequest($"Custom behavior not set. {Configuration.CustomResponseFile} not found.", MessageType.Failed, new(e.Session)); - e.Session.GenericResponse("Custom response file not found.", HttpStatusCode.InternalServerError, Array.Empty()); + e.Session.GenericResponse("Custom response file not found.", HttpStatusCode.InternalServerError, []); state.HasBeenSet = true; } }