abetlen · abetlen · Apr 7, 2023 · Apr 5, 2023 · Apr 5, 2023 · Apr 5, 2023
diff --git a/.gitignore b/.gitignore
@@ -163,4 +163,4 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
diff --git a/examples/high_level_api/fastapi_server.py b/examples/high_level_api/fastapi_server.py
@@ -27,10 +27,10 @@
 class Settings(BaseSettings):
     model: str
     n_ctx: int = 2048
-    n_batch: int = 2048
-    n_threads: int = os.cpu_count() or 1
+    n_batch: int = 8
+    n_threads: int = int(os.cpu_count() / 2) or 1
     f16_kv: bool = True
-    use_mlock: bool = True
+    use_mlock: bool = False     # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out...
     embedding: bool = True
     last_n_tokens_size: int = 64
 

diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py
@@ -27,10 +27,10 @@
 class Settings(BaseSettings):
     model: str
     n_ctx: int = 2048
-    n_batch: int = 2048
-    n_threads: int = os.cpu_count() or 1
+    n_batch: int = 8
+    n_threads: int = int(os.cpu_count() / 2) or 1
     f16_kv: bool = True
-    use_mlock: bool = True
+    use_mlock: bool = False     # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out...
     embedding: bool = True
     last_n_tokens_size: int = 64
 

diff --git a/setup.py b/setup.py
@@ -19,6 +19,7 @@
     entry_points={"console_scripts": ["llama_cpp.server=llama_cpp.server:main"]},
     install_requires=[
         "typing-extensions>=4.5.0",
+        "pydantic==1.10.7",
     ],
     extras_require={
         "server": ["uvicorn>=0.21.1", "fastapi>=0.95.0", "sse-starlette>=1.3.3"],