Skip to content

Commit 22ce231

Browse files
authored
Merge pull request #1167 from iceljc/features/refine-model-settings
Features/refine model settings
2 parents 29c3fa5 + d1a7799 commit 22ce231

File tree

23 files changed

+793
-781
lines changed

23 files changed

+793
-781
lines changed

BotSharp.sln

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Test.RealtimeVoice
145145
EndProject
146146
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Plugin.ChartHandler", "src\Plugins\BotSharp.Plugin.ChartHandler\BotSharp.Plugin.ChartHandler.csproj", "{0428DEAA-E4FE-4259-A6D8-6EDD1A9D0702}"
147147
EndProject
148+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Plugin.ExcelHandler", "src\Plugins\BotSharp.Plugin.ExcelHandler\BotSharp.Plugin.ExcelHandler.csproj", "{FC63C875-E880-D8BB-B8B5-978AB7B62983}"
149+
EndProject
148150
Global
149151
GlobalSection(SolutionConfigurationPlatforms) = preSolution
150152
Debug|Any CPU = Debug|Any CPU
@@ -609,6 +611,14 @@ Global
609611
{0428DEAA-E4FE-4259-A6D8-6EDD1A9D0702}.Release|Any CPU.Build.0 = Release|Any CPU
610612
{0428DEAA-E4FE-4259-A6D8-6EDD1A9D0702}.Release|x64.ActiveCfg = Release|Any CPU
611613
{0428DEAA-E4FE-4259-A6D8-6EDD1A9D0702}.Release|x64.Build.0 = Release|Any CPU
614+
{FC63C875-E880-D8BB-B8B5-978AB7B62983}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
615+
{FC63C875-E880-D8BB-B8B5-978AB7B62983}.Debug|Any CPU.Build.0 = Debug|Any CPU
616+
{FC63C875-E880-D8BB-B8B5-978AB7B62983}.Debug|x64.ActiveCfg = Debug|Any CPU
617+
{FC63C875-E880-D8BB-B8B5-978AB7B62983}.Debug|x64.Build.0 = Debug|Any CPU
618+
{FC63C875-E880-D8BB-B8B5-978AB7B62983}.Release|Any CPU.ActiveCfg = Release|Any CPU
619+
{FC63C875-E880-D8BB-B8B5-978AB7B62983}.Release|Any CPU.Build.0 = Release|Any CPU
620+
{FC63C875-E880-D8BB-B8B5-978AB7B62983}.Release|x64.ActiveCfg = Release|Any CPU
621+
{FC63C875-E880-D8BB-B8B5-978AB7B62983}.Release|x64.Build.0 = Release|Any CPU
612622
EndGlobalSection
613623
GlobalSection(SolutionProperties) = preSolution
614624
HideSolutionNode = FALSE
@@ -679,6 +689,7 @@ Global
679689
{7C0C7D13-D161-4AB0-9C29-83A0F1FF990E} = {32FAFFFE-A4CB-4FEE-BF7C-84518BBC6DCC}
680690
{B067B126-88CD-4282-BEEF-7369B64423EF} = {32FAFFFE-A4CB-4FEE-BF7C-84518BBC6DCC}
681691
{0428DEAA-E4FE-4259-A6D8-6EDD1A9D0702} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
692+
{FC63C875-E880-D8BB-B8B5-978AB7B62983} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
682693
EndGlobalSection
683694
GlobalSection(ExtensibilityGlobals) = postSolution
684695
SolutionGuid = {A9969D89-C98B-40A5-A12B-FC87E55B3A19}

src/Infrastructure/BotSharp.Abstraction/MLTasks/Settings/LlmModelSetting.cs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ public class LlmModelSetting
6262
/// </summary>
6363
public ImageSetting? Image { get; set; }
6464

65+
/// <summary>
66+
/// Settings for audio
67+
/// </summary>
68+
public AudioSetting? Audio { get; set; }
69+
6570
/// <summary>
6671
/// Settings for llm cost
6772
/// </summary>
@@ -128,6 +133,20 @@ public class ImageVariationSetting
128133
}
129134
#endregion
130135

136+
#region Audio model settings
137+
public class AudioSetting
138+
{
139+
public AudioTranscriptionSetting? Transcription { get; set; }
140+
}
141+
142+
public class AudioTranscriptionSetting
143+
{
144+
public float? Temperature { get; set; }
145+
public ModelSettingBase? ResponseFormat { get; set; }
146+
public ModelSettingBase? Granularity { get; set; }
147+
}
148+
#endregion
149+
131150
public class ModelSettingBase
132151
{
133152
public string? Default { get; set; }

src/Plugins/BotSharp.Plugin.AudioHandler/Functions/HandleAudioRequestFn.cs

Lines changed: 0 additions & 135 deletions
This file was deleted.
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
namespace BotSharp.Plugin.AudioHandler.Functions;
2+
3+
public class ReadAudioFn : IFunctionCallback
4+
{
5+
public string Name => "util-audio-handle_audio_request";
6+
public string Indication => "Reading audio";
7+
8+
private readonly IServiceProvider _services;
9+
private readonly IFileStorageService _fileStorage;
10+
private readonly ILogger<ReadAudioFn> _logger;
11+
private readonly BotSharpOptions _options;
12+
private readonly AudioHandlerSettings _settings;
13+
14+
private readonly IEnumerable<string> _audioContentTypes = new List<string>
15+
{
16+
AudioType.mp3.ToFileType(),
17+
AudioType.wav.ToFileType(),
18+
};
19+
20+
public ReadAudioFn(
21+
IServiceProvider services,
22+
ILogger<ReadAudioFn> logger,
23+
BotSharpOptions options,
24+
AudioHandlerSettings settings,
25+
IFileStorageService fileStorage)
26+
{
27+
_services = services;
28+
_logger = logger;
29+
_options = options;
30+
_settings = settings;
31+
_fileStorage = fileStorage;
32+
}
33+
34+
public async Task<bool> Execute(RoleDialogModel message)
35+
{
36+
var args = JsonSerializer.Deserialize<LlmContextIn>(message.FunctionArgs, _options.JsonSerializerOptions);
37+
var conv = _services.GetRequiredService<IConversationService>();
38+
var routingCtx = _services.GetRequiredService<IRoutingContext>();
39+
40+
var wholeDialogs = routingCtx.GetDialogs();
41+
if (wholeDialogs.IsNullOrEmpty())
42+
{
43+
wholeDialogs = conv.GetDialogHistory();
44+
}
45+
46+
var dialogs = AssembleFiles(conv.ConversationId, wholeDialogs);
47+
var response = await GetAudioTranscription(dialogs);
48+
message.Content = response;
49+
dialogs.ForEach(x => x.Files = null);
50+
return true;
51+
}
52+
53+
private List<RoleDialogModel> AssembleFiles(string convId, List<RoleDialogModel> dialogs)
54+
{
55+
if (dialogs.IsNullOrEmpty())
56+
{
57+
return new List<RoleDialogModel>();
58+
}
59+
60+
var messageId = dialogs.Select(x => x.MessageId).Distinct().ToList();
61+
var audioFiles = _fileStorage.GetMessageFiles(convId, messageId, options: new()
62+
{
63+
Sources = [FileSource.User],
64+
ContentTypes = _audioContentTypes
65+
});
66+
67+
foreach (var dialog in dialogs)
68+
{
69+
var found = audioFiles.Where(x => x.MessageId == dialog.MessageId
70+
&& x.FileSource.IsEqualTo(FileSource.User)).ToList();
71+
72+
if (found.IsNullOrEmpty() || !dialog.IsFromUser)
73+
{
74+
continue;
75+
}
76+
77+
dialog.Files = found.Select(x => new BotSharpFile
78+
{
79+
ContentType = x.ContentType,
80+
FileUrl = x.FileUrl,
81+
FileStorageUrl = x.FileStorageUrl
82+
}).ToList();
83+
}
84+
85+
return dialogs;
86+
}
87+
88+
private async Task<string> GetAudioTranscription(List<RoleDialogModel> dialogs)
89+
{
90+
var audioCompletion = PrepareModel();
91+
var dialog = dialogs.Where(x => !x.Files.IsNullOrEmpty()).LastOrDefault();
92+
var transcripts = new List<string>();
93+
94+
if (dialog != null)
95+
{
96+
foreach (var file in dialog.Files)
97+
{
98+
if (string.IsNullOrWhiteSpace(file?.FileStorageUrl))
99+
{
100+
continue;
101+
}
102+
103+
var extension = Path.GetExtension(file.FileStorageUrl);
104+
var fileName = Path.GetFileName(file.FileStorageUrl);
105+
if (!VerifyAudioFileType(fileName))
106+
{
107+
continue;
108+
}
109+
110+
var binary = _fileStorage.GetFileBytes(file.FileStorageUrl);
111+
using var stream = binary.ToStream();
112+
stream.Position = 0;
113+
114+
var result = await audioCompletion.TranscriptTextAsync(stream, fileName);
115+
transcripts.Add(result);
116+
stream.Close();
117+
await Task.Delay(100);
118+
}
119+
}
120+
121+
122+
if (transcripts.IsNullOrEmpty())
123+
{
124+
var msg = "No audio is found in the chat.";
125+
_logger.LogWarning(msg);
126+
transcripts.Add(msg);
127+
}
128+
129+
return string.Join("\r\n\r\n", transcripts);
130+
}
131+
132+
private IAudioTranscription PrepareModel()
133+
{
134+
var (provider, model) = GetLlmProviderModel();
135+
return CompletionProvider.GetAudioTranscriber(_services, provider: provider, model: model);
136+
}
137+
138+
private bool VerifyAudioFileType(string fileName)
139+
{
140+
var extension = Path.GetExtension(fileName).TrimStart('.').ToLower();
141+
return Enum.TryParse<AudioType>(extension, out _)
142+
|| !string.IsNullOrEmpty(FileUtility.GetFileContentType(fileName));
143+
}
144+
145+
private (string, string) GetLlmProviderModel()
146+
{
147+
var state = _services.GetRequiredService<IConversationStateService>();
148+
var llmProviderService = _services.GetRequiredService<ILlmProviderService>();
149+
150+
var provider = state.GetState("audio_read_llm_provider");
151+
var model = state.GetState("audio_read_llm_provider");
152+
153+
if (!string.IsNullOrEmpty(provider) && !string.IsNullOrEmpty(model))
154+
{
155+
return (provider, model);
156+
}
157+
158+
provider = _settings?.Audio?.Reading?.LlmProvider;
159+
model = _settings?.Audio?.Reading?.LlmModel;
160+
161+
if (!string.IsNullOrEmpty(provider) && !string.IsNullOrEmpty(model))
162+
{
163+
return (provider, model);
164+
}
165+
166+
provider = "openai";
167+
model = "gpt-4o-mini-transcribe";
168+
169+
return (provider, model);
170+
}
171+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
1+
using BotSharp.Abstraction.Models;
2+
13
namespace BotSharp.Plugin.AudioHandler.Settings;
24

35
public class AudioHandlerSettings
6+
{
7+
public AudioSettings? Audio { get; set; }
8+
}
9+
10+
#region Audio
11+
public class AudioSettings
12+
{
13+
public AudioReadSettings? Reading { get; set; }
14+
}
15+
16+
public class AudioReadSettings : LlmBase
417
{
518
}
19+
#endregion

0 commit comments

Comments
 (0)