Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/go/huggingface/langchain.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package main

// This is a wrapper to statisfy the GRPC service interface
// This is a wrapper to satisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
Expand Down
4 changes: 2 additions & 2 deletions backend/go/llm/llama/llama.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package main

// This is a wrapper to statisfy the GRPC service interface
// This is a wrapper to satisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
Expand Down Expand Up @@ -79,7 +79,7 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
}

if opts.LowVRAM {
llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
llamaOpts = append(llamaOpts, llama.EnableLowVRAM)
}

if opts.DraftModel != "" {
Expand Down
2 changes: 1 addition & 1 deletion backend/go/local-store/store.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package main

// This is a wrapper to statisfy the GRPC service interface
// This is a wrapper to satisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"container/heap"
Expand Down
2 changes: 1 addition & 1 deletion backend/go/piper/piper.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package main

// This is a wrapper to statisfy the GRPC service interface
// This is a wrapper to satisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
Expand Down
2 changes: 1 addition & 1 deletion backend/go/silero-vad/vad.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package main

// This is a wrapper to statisfy the GRPC service interface
// This is a wrapper to satisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
Expand Down
2 changes: 1 addition & 1 deletion backend/go/whisper/gowhisper.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func (w *Whisper) VAD(req *pb.VADRequest) (pb.VADResponse, error) {
}, nil
}

// unsafeptr warning is caused by segsPtr being on the stack and therefor being subject to stack copying AFAICT
// unsafeptr warning is caused by segsPtr being on the stack and therefore being subject to stack copying AFAICT
// however the stack shouldn't have grown between setting segsPtr and now, also the memory pointed to is allocated by C++
segs := unsafe.Slice((*float32)(unsafe.Pointer(segsPtr)), segsLen)

Expand Down
2 changes: 1 addition & 1 deletion core/cli/federated.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ type FederatedCLI struct {
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
RandomWorker bool `env:"LOCALAI_RANDOM_WORKER,RANDOM_WORKER" default:"false" help:"Select a random worker from the pool" group:"p2p"`
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances." group:"p2p"`
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarily by the user for grouping a set of instances." group:"p2p"`
TargetWorker string `env:"LOCALAI_TARGET_WORKER,TARGET_WORKER" help:"Target worker to run the federated server on" group:"p2p"`
}

Expand Down
6 changes: 3 additions & 3 deletions core/cli/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,15 @@ type RunCMD struct {
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disables the web user interface. When set to true, the server will only expose API endpoints without serving the web interface" group:"api"`
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
UseSubtleKeyComparison bool `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"`
UseSubtleKeyComparison bool `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliency against timing attacks." group:"hardening"`
DisableApiKeyRequirementForHttpGet bool `env:"LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET" default:"false" help:"If true, a valid API key is not required to issue GET requests to portions of the web ui. This should only be enabled in secure testing environments" group:"hardening"`
DisableMetricsEndpoint bool `env:"LOCALAI_DISABLE_METRICS_ENDPOINT,DISABLE_METRICS_ENDPOINT" default:"false" help:"Disable the /metrics endpoint" group:"api"`
HttpGetExemptedEndpoints []string `env:"LOCALAI_HTTP_GET_EXEMPTED_ENDPOINTS" default:"^/$,^/browse/?$,^/talk/?$,^/p2p/?$,^/chat/?$,^/text2image/?$,^/tts/?$,^/static/.*$,^/swagger.*$" help:"If LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET is overriden to true, this is the list of endpoints to exempt. Only adjust this in case of a security incident or as a result of a personal security posture review" group:"hardening"`
HttpGetExemptedEndpoints []string `env:"LOCALAI_HTTP_GET_EXEMPTED_ENDPOINTS" default:"^/$,^/browse/?$,^/talk/?$,^/p2p/?$,^/chat/?$,^/text2image/?$,^/tts/?$,^/static/.*$,^/swagger.*$" help:"If LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET is overridden to true, this is the list of endpoints to exempt. Only adjust this in case of a security incident or as a result of a personal security posture review" group:"hardening"`
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
Peer2PeerDHTInterval int `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
Peer2PeerOTPInterval int `env:"LOCALAI_P2P_OTP_INTERVAL,P2P_OTP_INTERVAL" default:"9000" name:"p2p-otp-interval" help:"Interval for OTP refresh (used during token generation)" group:"p2p"`
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarily by the user for grouping a set of instances" group:"p2p"`
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
Expand Down
2 changes: 1 addition & 1 deletion core/cli/worker/worker_p2p.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ type P2P struct {
NoRunner bool `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
RunnerAddress string `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
RunnerPort string `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarily by the user for grouping a set of instances" group:"p2p"`
}

func (r *P2P) Run(ctx *cliContext.Context) error {
Expand Down
8 changes: 4 additions & 4 deletions core/http/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func getModelStatus(url string) (response map[string]interface{}) {
// Unmarshal the response into a map[string]interface{}
err = json.Unmarshal(body, &response)
if err != nil {
fmt.Println("Error unmarshaling JSON response:", err)
fmt.Println("Error unmarshalling JSON response:", err)
return
}
return
Expand Down Expand Up @@ -131,7 +131,7 @@ func postModelApplyRequest(url string, request modelApplyRequest) (response map[
// Unmarshal the response into a map[string]interface{}
err = json.Unmarshal(body, &response)
if err != nil {
fmt.Println("Error unmarshaling JSON response:", err)
fmt.Println("Error unmarshalling JSON response:", err)
return
}
return
Expand Down Expand Up @@ -629,7 +629,7 @@ var _ = Describe("API test", func() {
},
"unit": {
Type: jsonschema.String,
Enum: []string{"celcius", "fahrenheit"},
Enum: []string{"celsius", "fahrenheit"},
},
},
Required: []string{"location"},
Expand All @@ -646,7 +646,7 @@ var _ = Describe("API test", func() {
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
Expect(err).ToNot(HaveOccurred())
Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
Expect(res["unit"]).To(Equal("celsius"), fmt.Sprint(res))
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
})

Expand Down
4 changes: 2 additions & 2 deletions core/http/endpoints/localai/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ import (
"github.com/mudler/LocalAI/pkg/model"
)

// SystemInformations returns the system informations
// SystemInformation returns the system information
// @Summary Show the LocalAI instance information
// @Success 200 {object} schema.SystemInformationResponse "Response"
// @Router /system [get]
func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
func SystemInformation(ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
return func(c *fiber.Ctx) error {
availableBackends := []string{}
loadedModels := ml.ListLoadedModels()
Expand Down
2 changes: 1 addition & 1 deletion core/http/endpoints/openai/realtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -799,7 +799,7 @@ func commitUtterance(ctx context.Context, utt []byte, cfg *config.ModelConfig, e
}

if !session.TranscriptionOnly {
sendNotImplemented(c, "Commiting items to the conversation not implemented")
sendNotImplemented(c, "Committing items to the conversation not implemented")
}

// TODO: Commit the audio and/or transcribed text to the conversation
Expand Down
4 changes: 2 additions & 2 deletions core/http/endpoints/openai/realtime_model.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ type wrappedModel struct {

// anyToAnyModel represent a model which supports Any-to-Any operations
// We have to wrap this out as well because we want to load two models one for VAD and one for the actual model.
// In the future there could be models that accept continous audio input only so this design will be useful for that
// In the future there could be models that accept continuous audio input only so this design will be useful for that
type anyToAnyModel struct {
LLMConfig *config.ModelConfig
LLMClient grpcClient.Backend
Expand Down Expand Up @@ -211,7 +211,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model

log.Debug().Msg("Loading a wrapped model")

// Otherwise we want to return a wrapped model, which is a "virtual" model that re-uses other models to perform operations
// Otherwise we want to return a wrapped model, which is a "virtual" model that reuses other models to perform operations
cfgLLM, err := cl.LoadModelConfigFileByName(pipeline.LLM, ml.ModelPath)
if err != nil {

Expand Down
2 changes: 1 addition & 1 deletion core/http/endpoints/openai/types/realtime.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package types

// Most of this file was coppied from https://github.com/WqyJh/go-openai-realtime
// Most of this file was copied from https://github.com/WqyJh/go-openai-realtime
// Copyright (c) 2024 Qiying Wang MIT License

import (
Expand Down
2 changes: 1 addition & 1 deletion docs/content/docs/advanced/advanced-usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@ A list of the environment variable that tweaks parallelism is the following:
```
### Python backends GRPC max workers
### Default number of workers for GRPC Python backends.
### This actually controls wether a backend can process multiple requests or not.
### This actually controls whether a backend can process multiple requests or not.
# PYTHON_GRPC_MAX_WORKERS=1

### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
Expand Down
2 changes: 1 addition & 1 deletion docs/content/docs/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Here are answers to some of the most common questions.

### How do I get models?

Most gguf-based models should work, but newer models may require additions to the API. If a model doesn't work, please feel free to open up issues. However, be cautious about downloading models from the internet and directly onto your machine, as there may be security vulnerabilities in lama.cpp or ggml that could be maliciously exploited. Some models can be found on Hugging Face: https://huggingface.co/models?search=gguf, or models from gpt4all are compatible too: https://github.com/nomic-ai/gpt4all.
Most gguf-based models should work, but newer models may require additions to the API. If a model doesn't work, please feel free to open up issues. However, be cautious about downloading models from the internet and directly onto your machine, as there may be security vulnerabilities in llama.cpp or ggml that could be maliciously exploited. Some models can be found on Hugging Face: https://huggingface.co/models?search=gguf, or models from gpt4all are compatible too: https://github.com/nomic-ai/gpt4all.

### Benchmarking LocalAI and llama.cpp shows different results!

Expand Down
4 changes: 2 additions & 2 deletions docs/content/docs/features/image-generation.md
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ Pipelines types available:

##### Advanced: Additional parameters

Additional arbitrarly parameters can be specified in the option field in key/value separated by `:`:
Additional arbitrarily parameters can be specified in the option field in key/value separated by `:`:

```yaml
name: animagine-xl
Expand All @@ -210,7 +210,7 @@ options:
- "cfg_scale:6"
```

**Note**: There is no complete parameter list. Any parameter can be passed arbitrarly and is passed to the model directly as argument to the pipeline. Different pipelines/implementations support different parameters.
**Note**: There is no complete parameter list. Any parameter can be passed arbitrarily and is passed to the model directly as argument to the pipeline. Different pipelines/implementations support different parameters.

The example above, will result in the following python code when generating images:

Expand Down
2 changes: 1 addition & 1 deletion docs/content/docs/features/text-generation.md
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ The backend will automatically download the required files in order to run the m
- `OVModelForCausalLM` requires OpenVINO IR [Text Generation](https://huggingface.co/models?library=openvino&pipeline_tag=text-generation) models from Hugging face
- `OVModelForFeatureExtraction` works with any Safetensors Transformer [Feature Extraction](https://huggingface.co/models?pipeline_tag=feature-extraction&library=transformers,safetensors) model from Huggingface (Embedding Model)

Please note that streaming is currently not implemente in `AutoModelForCausalLM` for Intel GPU.
Please note that streaming is currently not implemented in `AutoModelForCausalLM` for Intel GPU.
AMD GPU support is not implemented.
Although AMD CPU is not officially supported by OpenVINO there are reports that it works: YMMV.

Expand Down
2 changes: 1 addition & 1 deletion docs/content/docs/whats-new.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ It is now possible for single-devices with one GPU to specify `--single-active-b

#### Resources management

Thanks to the continous community efforts (another cool contribution from {{< github "dave-gray101" >}} ) now it's possible to shutdown a backend programmatically via the API.
Thanks to the continuous community efforts (another cool contribution from {{< github "dave-gray101" >}} ) now it's possible to shutdown a backend programmatically via the API.
There is an ongoing effort in the community to better handling of resources. See also the [🔥Roadmap](https://localai.io/#-hot-topics--roadmap).

#### New how-to section
Expand Down
2 changes: 1 addition & 1 deletion docs/static/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ else
fi
THREADS=${THREADS:-$procs}
LATEST_VERSION=$(curl -s "https://github.com/api/repos/mudler/LocalAI/releases/latest" | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
LOCALAI_VERSION="${LOCALAI_VERSION:-$LATEST_VERSION}" #changed due to VERSION beign already defined in Fedora 42 Cloud Edition
LOCALAI_VERSION="${LOCALAI_VERSION:-$LATEST_VERSION}" #changed due to VERSION being already defined in Fedora 42 Cloud Edition
MODELS_PATH=${MODELS_PATH:-/usr/share/local-ai/models}


Expand Down
Loading