mudler · mudler · Dec 16, 2023 · Dec 16, 2023
diff --git a/docs/content/model-compatibility/_index.en.md b/docs/content/model-compatibility/_index.en.md
@@ -50,6 +50,8 @@ Besides llama based models, LocalAI is compatible also with other architectures.
 | `diffusers`  | SD,...                   | no                       | Image generation    | no                               | no                   | N/A |
 | `vall-e-x` | Vall-E    | no                       | Audio generation and Voice cloning    | no                               | no                   | CPU/CUDA |
 | `vllm` | Various GPTs and quantization formats | yes                      | GPT             | no | no                  | CPU/CUDA |
+| `exllama2`  | GPTQ                   | yes                       | GPT only                  | no                               | no                   | N/A |
+| `transformers-musicgen`  |                    | no                       | Audio generation                | no                               | no                   | N/A |
 
 Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "advanced" %}})).
 

diff --git a/docs/content/model-compatibility/llama-cpp.md b/docs/content/model-compatibility/llama-cpp.md
@@ -9,7 +9,7 @@ weight = 1
 
 {{% notice note %}}
 
-The `ggml` file format has been deprecated. If you are using `ggml` models and you are configuring your model with a YAML file, specify, use the `llama-stable` backend instead. If you are relying in automatic detection of the model, you should be fine. For `gguf` models, use the `llama` backend.
+The `ggml` file format has been deprecated. If you are using `ggml` models and you are configuring your model with a YAML file, specify, use the `llama-ggml` backend instead. If you are relying in automatic detection of the model, you should be fine. For `gguf` models, use the `llama` backend. The go backend is deprecated as well but still available as `go-llama`. The go backend supports still features not available in the mainline: speculative sampling and embeddings.
 
 {{% /notice %}}
 
@@ -65,11 +65,11 @@ parameters:
 
 In the example above we specify `llama` as the backend to restrict loading `gguf` models only. 
 
-For instance, to use the `llama-stable` backend for `ggml` models:
+For instance, to use the `llama-ggml` backend for `ggml` models:
 
 ```yaml
 name: llama
-backend: llama-stable
+backend: llama-ggml
 parameters:
   # Relative to the models path
   model: file.ggml.bin

diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
@@ -14,6 +14,11 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
+var Aliases map[string]string = map[string]string{
+	"go-llama": GoLlamaBackend,
+	"llama":    LLamaCPP,
+}
+
 const (
 	GoLlamaBackend      = "llama"
 	LlamaGGML           = "llama-ggml"
@@ -169,9 +174,13 @@ func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (*grpc.C
 func (ml *ModelLoader) BackendLoader(opts ...Option) (client *grpc.Client, err error) {
 	o := NewOptions(opts...)
 
-	log.Debug().Msgf("Loading model %s from %s", o.backendString, o.model)
+	log.Info().Msgf("Loading model '%s' with backend %s", o.model, o.backendString)
 
 	backend := strings.ToLower(o.backendString)
+	if realBackend, exists := Aliases[backend]; exists {
+		backend = realBackend
+		log.Debug().Msgf("%s is an alias of %s", backend, realBackend)
+	}
 
 	if o.singleActiveBackend {
 		ml.mu.Lock()