diff --git a/.gitignore b/.gitignore index fd64c09b3..d2d45ca1d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ models/ __pycache__/ *.py[cod] *$py.class +.DS_STORE + # C extensions *.so @@ -31,6 +33,7 @@ share/python-wheels/ .installed.cfg *.egg MANIFEST +libllama.dylib # PyInstaller # Usually these files are written by a python script from a template diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 1049e44a1..f2de1de24 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -599,13 +599,13 @@ def _convert_text_completion_chunks_to_chat( def create_chat_completion( self, messages: List[ChatCompletionMessage], - temperature: float = 0.8, - top_p: float = 0.95, - top_k: int = 40, + temperature: float = 0.72, + top_p: float = 0.73, + top_k: int = 0, stream: bool = False, stop: List[str] = [], - max_tokens: int = 128, - repeat_penalty: float = 1.1, + max_tokens: int = 256, + repeat_penalty: float = 1/0.85, ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]: """Generate a chat completion from a list of messages. @@ -627,7 +627,7 @@ def create_chat_completion( f'{message["role"]} {message.get("user", "")}: {message["content"]}' for message in messages ) - PROMPT = f" \n\n### Instructions:{instructions}\n\n### Inputs:{chat_history}\n\n### Response:\nassistant: " + PROMPT = f"### Instructions:\n{instructions}\n\n{chat_history}\n\n### Response:\nassistant:" PROMPT_STOP = ["###", "\nuser: ", "\nassistant: ", "\nsystem: "] completion_or_chunks = self( prompt=PROMPT,