@@ -149,18 +149,27 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
149
149
// Decode each request's message content
150
150
imgIndex , vidIndex , audioIndex := 0 , 0 , 0
151
151
for i , m := range input .Messages {
152
+ nrOfImgsInMessage := 0
153
+ nrOfVideosInMessage := 0
154
+ nrOfAudiosInMessage := 0
155
+
152
156
switch content := m .Content .(type ) {
153
157
case string :
154
158
input .Messages [i ].StringContent = content
155
159
case []interface {}:
156
160
dat , _ := json .Marshal (content )
157
161
c := []schema.Content {}
158
162
json .Unmarshal (dat , & c )
163
+
164
+ textContent := ""
165
+ // we will template this at the end
166
+
159
167
CONTENT:
160
168
for _ , pp := range c {
161
169
switch pp .Type {
162
170
case "text" :
163
- input .Messages [i ].StringContent = pp .Text
171
+ textContent += pp .Text
172
+ //input.Messages[i].StringContent = pp.Text
164
173
case "video" , "video_url" :
165
174
// Decode content as base64 either if it's an URL or base64 text
166
175
base64 , err := utils .GetContentURIAsBase64 (pp .VideoURL .URL )
@@ -169,14 +178,8 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
169
178
continue CONTENT
170
179
}
171
180
input .Messages [i ].StringVideos = append (input .Messages [i ].StringVideos , base64 ) // TODO: make sure that we only return base64 stuff
172
-
173
- t := "[vid-{{.ID}}]{{.Text}}"
174
- if config .TemplateConfig .Video != "" {
175
- t = config .TemplateConfig .Video
176
- }
177
- // set a placeholder for each image
178
- input .Messages [i ].StringContent , _ = templates .TemplateMultiModal (t , vidIndex , input .Messages [i ].StringContent )
179
181
vidIndex ++
182
+ nrOfVideosInMessage ++
180
183
case "audio_url" , "audio" :
181
184
// Decode content as base64 either if it's an URL or base64 text
182
185
base64 , err := utils .GetContentURIAsBase64 (pp .AudioURL .URL )
@@ -185,13 +188,8 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
185
188
continue CONTENT
186
189
}
187
190
input .Messages [i ].StringAudios = append (input .Messages [i ].StringAudios , base64 ) // TODO: make sure that we only return base64 stuff
188
- // set a placeholder for each image
189
- t := "[audio-{{.ID}}]{{.Text}}"
190
- if config .TemplateConfig .Audio != "" {
191
- t = config .TemplateConfig .Audio
192
- }
193
- input .Messages [i ].StringContent , _ = templates .TemplateMultiModal (t , audioIndex , input .Messages [i ].StringContent )
194
191
audioIndex ++
192
+ nrOfAudiosInMessage ++
195
193
case "image_url" , "image" :
196
194
// Decode content as base64 either if it's an URL or base64 text
197
195
base64 , err := utils .GetContentURIAsBase64 (pp .ImageURL .URL )
@@ -200,16 +198,21 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
200
198
continue CONTENT
201
199
}
202
200
203
- t := "[img-{{.ID}}]{{.Text}}"
204
- if config .TemplateConfig .Image != "" {
205
- t = config .TemplateConfig .Image
206
- }
207
201
input .Messages [i ].StringImages = append (input .Messages [i ].StringImages , base64 ) // TODO: make sure that we only return base64 stuff
208
- // set a placeholder for each image
209
- input .Messages [i ].StringContent , _ = templates .TemplateMultiModal (t , imgIndex , input .Messages [i ].StringContent )
202
+
210
203
imgIndex ++
204
+ nrOfImgsInMessage ++
211
205
}
212
206
}
207
+
208
+ input .Messages [i ].StringContent , _ = templates .TemplateMultiModal (config .TemplateConfig .Multimodal , templates.MultiModalOptions {
209
+ TotalImages : imgIndex ,
210
+ TotalVideos : vidIndex ,
211
+ TotalAudios : audioIndex ,
212
+ ImagesInMessage : nrOfImgsInMessage ,
213
+ VideosInMessage : nrOfVideosInMessage ,
214
+ AudiosInMessage : nrOfAudiosInMessage ,
215
+ }, textContent )
213
216
}
214
217
}
215
218
0 commit comments