mudler
diff --git a/‎api/backend/llm.go
Lines changed: 2 additions & 1 deletion b/‎api/backend/llm.go
Lines changed: 2 additions & 1 deletion
diff --git a/‎api/backend/options.go
Lines changed: 1 addition & 0 deletions b/‎api/backend/options.go
Lines changed: 1 addition & 0 deletions
diff --git a/‎api/config/config.go
Lines changed: 1 addition & 0 deletions b/‎api/config/config.go
Lines changed: 1 addition & 0 deletions
diff --git a/‎api/openai/chat.go
Lines changed: 13 additions & 5 deletions b/‎api/openai/chat.go
Lines changed: 13 additions & 5 deletions
diff --git a/‎api/openai/completion.go
Lines changed: 5 additions & 0 deletions b/‎api/openai/completion.go
Lines changed: 5 additions & 0 deletions
diff --git a/‎api/openai/inference.go
Lines changed: 6 additions & 1 deletion b/‎api/openai/inference.go
Lines changed: 6 additions & 1 deletion
diff --git a/‎api/openai/request.go
Lines changed: 63 additions & 0 deletions b/‎api/openai/request.go
Lines changed: 63 additions & 0 deletions
diff --git a/‎api/schema/openai.go
Lines changed: 11 additions & 1 deletion b/‎api/schema/openai.go
Lines changed: 11 additions & 1 deletion
diff --git a/‎backend/cpp/llama/grpc-server.cpp
Lines changed: 21 additions & 16 deletions b/‎backend/cpp/llama/grpc-server.cpp
Lines changed: 21 additions & 16 deletions
@@ -26,7 +26,7 @@ type TokenUsage struct {
 	Completion int
 }
 
-func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
+func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
 	modelFile := c.Model
 
 	grpcOpts := gRPCModelOpts(c)
@@ -72,6 +72,7 @@ func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c
 	fn := func() (LLMResponse, error) {
 		opts := gRPCPredictOpts(c, loader.ModelPath)
 		opts.Prompt = s
+		opts.Images = images
 
 		tokenUsage := TokenUsage{}
 
 
@@ -45,6 +45,7 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
 		DraftModel:    c.DraftModel,
 		AudioPath:     c.VallE.AudioPath,
 		Quantization:  c.Quantization,
+		MMProj:        c.MMProj,
 		LoraAdapter:   c.LoraAdapter,
 		LoraBase:      c.LoraBase,
 		NGQA:          c.NGQA,
 
@@ -104,6 +104,7 @@ type LLMConfig struct {
 	DraftModel      string   `yaml:"draft_model"`
 	NDraft          int32    `yaml:"n_draft"`
 	Quantization    string   `yaml:"quantization"`
+	MMProj          string   `yaml:"mmproj"`
 }
 
 type AutoGPTQ struct {
 
@@ -81,6 +81,10 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
 			noActionDescription = config.FunctionsConfig.NoActionDescriptionName
 		}
 
+		if input.ResponseFormat == "json_object" {
+			input.Grammar = grammar.JSONBNF
+		}
+
 		// process functions if we have any defined or if we have a function call string
 		if len(input.Functions) > 0 && config.ShouldUseFunctions() {
 			log.Debug().Msgf("Response needs to process functions")
@@ -140,14 +144,14 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
 				}
 			}
 			r := config.Roles[role]
-			contentExists := i.Content != nil && *i.Content != ""
+			contentExists := i.Content != nil && i.StringContent != ""
 			// First attempt to populate content via a chat message specific template
 			if config.TemplateConfig.ChatMessage != "" {
 				chatMessageData := model.ChatMessageTemplateData{
 					SystemPrompt: config.SystemPrompt,
 					Role:         r,
 					RoleName:     role,
-					Content:      *i.Content,
+					Content:      i.StringContent,
 					MessageIndex: messageIndex,
 				}
 				templatedChatMessage, err := o.Loader.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
@@ -166,7 +170,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
 			if content == "" {
 				if r != "" {
 					if contentExists {
-						content = fmt.Sprint(r, " ", *i.Content)
+						content = fmt.Sprint(r, " ", i.StringContent)
 					}
 					if i.FunctionCall != nil {
 						j, err := json.Marshal(i.FunctionCall)
@@ -180,7 +184,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
 					}
 				} else {
 					if contentExists {
-						content = fmt.Sprint(*i.Content)
+						content = fmt.Sprint(i.StringContent)
 					}
 					if i.FunctionCall != nil {
 						j, err := json.Marshal(i.FunctionCall)
@@ -334,7 +338,11 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
 					// Otherwise ask the LLM to understand the JSON output and the context, and return a message
 					// Note: This costs (in term of CPU) another computation
 					config.Grammar = ""
-					predFunc, err := backend.ModelInference(input.Context, predInput, o.Loader, *config, o, nil)
+					images := []string{}
+					for _, m := range input.Messages {
+						images = append(images, m.StringImages...)
+					}
+					predFunc, err := backend.ModelInference(input.Context, predInput, images, o.Loader, *config, o, nil)
 					if err != nil {
 						log.Error().Msgf("inference error: %s", err.Error())
 						return
 
@@ -12,6 +12,7 @@ import (
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/api/schema"
+	"github.com/go-skynet/LocalAI/pkg/grammar"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
@@ -64,6 +65,10 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 
+		if input.ResponseFormat == "json_object" {
+			input.Grammar = grammar.JSONBNF
+		}
+
 		log.Debug().Msgf("Parameter Config: %+v", config)
 
 		if input.Stream {
 
@@ -23,8 +23,13 @@ func ComputeChoices(
 		n = 1
 	}
 
+	images := []string{}
+	for _, m := range req.Messages {
+		images = append(images, m.StringImages...)
+	}
+
 	// get the model function to call for the result
-	predFunc, err := backend.ModelInference(req.Context, predInput, loader, *config, o, tokenCallback)
+	predFunc, err := backend.ModelInference(req.Context, predInput, images, loader, *config, o, tokenCallback)
 	if err != nil {
 		return result, backend.TokenUsage{}, err
 	}
 
@@ -2,8 +2,11 @@ package openai
 
 import (
 	"context"
+	"encoding/base64"
 	"encoding/json"
 	"fmt"
+	"io/ioutil"
+	"net/http"
 	"os"
 	"path/filepath"
 	"strings"
@@ -61,6 +64,37 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *sche
 	return modelFile, input, nil
 }
 
+// this function check if the string is an URL, if it's an URL downloads the image in memory
+// encodes it in base64 and returns the base64 string
+func getBase64Image(s string) (string, error) {
+	if strings.HasPrefix(s, "http") {
+		// download the image
+		resp, err := http.Get(s)
+		if err != nil {
+			return "", err
+		}
+		defer resp.Body.Close()
+
+		// read the image data into memory
+		data, err := ioutil.ReadAll(resp.Body)
+		if err != nil {
+			return "", err
+		}
+
+		// encode the image data in base64
+		encoded := base64.StdEncoding.EncodeToString(data)
+
+		// return the base64 string
+		return encoded, nil
+	}
+
+	// if the string instead is prefixed with "data:image/jpeg;base64,", drop it
+	if strings.HasPrefix(s, "data:image/jpeg;base64,") {
+		return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
+	}
+	return "", fmt.Errorf("not valid string")
+}
+
 func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
 	if input.Echo {
 		config.Echo = input.Echo
@@ -129,6 +163,35 @@ func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
 		}
 	}
 
+	// Decode each request's message content
+	index := 0
+	for _, m := range input.Messages {
+		switch content := m.Content.(type) {
+		case string:
+			m.StringContent = content
+		case []interface{}:
+			dat, _ := json.Marshal(content)
+			c := []schema.Content{}
+			json.Unmarshal(dat, &c)
+			for _, pp := range c {
+				if pp.Type == "text" {
+					m.StringContent = pp.Text
+				} else if pp.Type == "image_url" {
+					// Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
+					base64, err := getBase64Image(pp.ImageURL)
+					if err == nil {
+						m.StringImages = append(m.StringImages, base64) // TODO: make sure that we only return base64 stuff
+						// set a placeholder for each image
+						m.StringContent = m.StringContent + fmt.Sprintf("[img-%d]", index)
+						index++
+					} else {
+						fmt.Print("Failed encoding image", err)
+					}
+				}
+			}
+		}
+	}
+
 	if input.RepeatPenalty != 0 {
 		config.RepeatPenalty = input.RepeatPenalty
 	}
 
@@ -55,11 +55,21 @@ type Choice struct {
 	Text         string   `json:"text,omitempty"`
 }
 
+type Content struct {
+	Type     string `json:"type" yaml:"type"`
+	Text     string `json:"text" yaml:"text"`
+	ImageURL string `json:"image_url" yaml:"image_url"`
+}
+
 type Message struct {
 	// The message role
 	Role string `json:"role,omitempty" yaml:"role"`
 	// The message content
-	Content *string `json:"content" yaml:"content"`
+	Content interface{} `json:"content" yaml:"content"`
+
+	StringContent string   `json:"string_content,omitempty" yaml:"string_content,omitempty"`
+	StringImages  []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
+
 	// A result of a function call
 	FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
 }
 
@@ -1,9 +1,9 @@
 // llama.cpp gRPC C++ backend server
 //
-// Ettore Di Giacinto <[email protected]>
+// Ettore Di Giacinto <[email protected]> and llama.cpp authors
 //
 // This is a gRPC server for llama.cpp compatible with the LocalAI proto
-// Note: this is a re-adaptation of the original llama.cpp example/server.cpp for HTTP, 
+// Note: this is a re-adaptation of the original llama.cpp example/server.cpp for HTTP (https://github.com/ggerganov/llama.cpp/tree/master/examples/server), 
 // but modified to work with gRPC
 //
 
@@ -39,7 +39,7 @@ using grpc::Status;
 using backend::HealthMessage;
 
 
-///// LLAMA.CPP server
+///// LLAMA.CPP server code below
 
 using json = nlohmann::json;
 
@@ -1809,7 +1809,9 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con
 
 /////////////////////////////////
 ////////////////////////////////
-//////// LOCALAI
+//////// LOCALAI code starts below here
+/////////////////////////////////
+////////////////////////////////
 
 bool loaded_model; // TODO: add a mutex for this, but happens only once loading the model
 
@@ -1880,6 +1882,16 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
     data["prompt"] = predict->prompt();
     data["ignore_eos"] = predict->ignoreeos();
 
+    // for each image in the request, add the image data
+    //
+    for (int i = 0; i < predict->images_size(); i++) {
+        data["image_data"].push_back(json
+            {
+                {"id", i},
+                {"data",    predict->images(i)},
+            });
+    }
+
     data["stop"] = predict->stopprompts();
     // data["n_probs"] = predict->nprobs();
     //TODO: images,
@@ -1953,14 +1965,17 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
 //     }
 // }
 
-
-
 static void params_parse(const backend::ModelOptions* request,
                                 gpt_params & params) {
 
     // this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
 
     params.model = request->modelfile();
+    if (!request->mmproj().empty()) {
+    // get the directory of modelfile
+      std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
+      params.mmproj = model_dir + request->mmproj();
+    }
     //  params.model_alias ??
     params.model_alias =  request->modelfile();
     params.n_ctx = request->contextsize();
@@ -2071,16 +2086,6 @@ class BackendServiceImpl final : public backend::Backend::Service {
                 break;
             }
         }
-        return grpc::Status::OK;
-                   
-
-                    // auto on_complete = [task_id, &llama] (bool)
-                    // {
-                    //     // cancel
-                    //     llama.request_cancel(task_id);
-                    // };
-
- 
 
         return grpc::Status::OK;
     }
Original file line number	Diff line number	Diff line change
`@@ -104,6 +104,7 @@ type LLMConfig struct {`
`104`	`104`	DraftModel string `yaml:"draft_model"`
`105`	`105`	NDraft int32 `yaml:"n_draft"`
`106`	`106`	Quantization string `yaml:"quantization"`
	`107`	+ MMProj string `yaml:"mmproj"`
`107`	`108`	`}`
`108`	`109`
`109`	`110`	`type AutoGPTQ struct {`
Original file line number	Diff line number	Diff line change
`@@ -23,8 +23,13 @@ func ComputeChoices(`
`23`	`23`	`n = 1`
`24`	`24`	`}`
`25`	`25`
	`26`	`+ images := []string{}`
	`27`	`+ for _, m := range req.Messages {`
	`28`	`+ images = append(images, m.StringImages...)`
	`29`	`+ }`
	`30`	`+`
`26`	`31`	`// get the model function to call for the result`
`27`		`- predFunc, err := backend.ModelInference(req.Context, predInput, loader, *config, o, tokenCallback)`
	`32`	`+ predFunc, err := backend.ModelInference(req.Context, predInput, images, loader, *config, o, tokenCallback)`
`28`	`33`	`if err != nil {`
`29`	`34`	`return result, backend.TokenUsage{}, err`
`30`	`35`	`}`