wrapping max tokens and temperature (#14)

* wrapping max tokens and temperature --------- Authored-by: Eyal Paz <eyalp700@gmail.com>
uripeled2 · Jun 3, 2023 · 11c5605 · 11c5605
1 parent ca3d063
commit 11c5605
Show file tree

Hide file tree

Showing 12 changed files with 60 additions and 40 deletions.
diff --git a/README.md b/README.md
@@ -46,7 +46,8 @@ class BaseLLMAPIClient(BaseLLMClient, ABC):
         ...
 
     @abstractmethod
-    async def text_completion(self, prompt: str, model: str | None = None, **kwargs) -> list[str]:
+    async def text_completion(self, prompt: str, model: str | None = None, max_tokens: int | None = None,
+                              temperature: float | None = None, **kwargs) -> list[str]:
         raise NotImplementedError()
 
     async def embedding(self, text: str, model: str | None = None, **kwargs) -> list[float]:
@@ -130,7 +131,7 @@ async def main():
                                                                api_key=OPENAI_API_KEY)
 
         await llm_client.text_completion(prompt="This is indeed a test")
-        await llm_client.text_completion(prompt="This is indeed a test", max_length=50)
+        await llm_client.text_completion(prompt="This is indeed a test", max_tokens=50)
 
 
 # Or if you don't want to use async
@@ -141,7 +142,7 @@ with SyncLLMAPIClientFactory() as llm_api_client_factory:
                                                            api_key=OPENAI_API_KEY)
 
     llm_client.text_completion(prompt="This is indeed a test")
-    llm_client.text_completion(prompt="This is indeed a test", max_length=50)
+    llm_client.text_completion(prompt="This is indeed a test", max_tokens=50)
 ```
 Local model
 ```python
@@ -158,7 +159,7 @@ async def main():
     llm_client = LocalClient(LocalClientConfig(model, tokenizer, os.environ["TENSORS_TYPE"], os.environ["DEVICE"]))
 
     await llm_client.text_completion(prompt="This is indeed a test")
-    await llm_client.text_completion(prompt="This is indeed a test", max_length=50)
+    await llm_client.text_completion(prompt="This is indeed a test", max_tokens=50)
 
 
 # Or if you don't want to use async
@@ -174,7 +175,7 @@ llm_client = LocalClient(LocalClientConfig(model, tokenizer, os.environ["TENSORS
 llm_client = async_to_sync.methods(llm_client)
 
 llm_client.text_completion(prompt="This is indeed a test")
-llm_client.text_completion(prompt="This is indeed a test", max_length=50)
+llm_client.text_completion(prompt="This is indeed a test", max_tokens=50)
 ```
 
 ## Contributing
@@ -195,7 +196,10 @@ Contributions are welcome! Please check out the todos below, and feel free to op
   - [ ] more
 - [ ] Add contributing guidelines
 - [ ] Create an easy way to run multiple LLMs in parallel with the same prompts
-- [ ] Convert common models parameter (e.g. temperature, max_tokens, etc.)
+- [x] Convert common models parameter
+  - [x] temperature 
+  - [x] max_tokens
+  - [ ] more
 
 ### Development
 To install the package in development mode, run the following command:

diff --git a/llm_client/__init__.py b/llm_client/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.2.0"
+__version__ = "0.3.0"
 
 from llm_client.base_llm_client import BaseLLMClient
 

diff --git a/llm_client/llm_api_client/ai21_client.py b/llm_client/llm_api_client/ai21_client.py
@@ -20,9 +20,11 @@ def __init__(self, config: LLMAPIClientConfig):
             self._base_url = BASE_URL
         self._headers[AUTH_HEADER] = BEARER_TOKEN + self._api_key
 
-    async def text_completion(self, prompt: str, model: str | None = None, **kwargs) -> list[str]:
+    async def text_completion(self, prompt: str, model: str | None = None, max_tokens : int = 16, temperature : float = 0.7, **kwargs) -> list[str]:
         model = model or self._default_model
         kwargs[PROMPT_KEY] = prompt
+        kwargs["maxTokens"] = kwargs.pop("maxTokens", max_tokens)
+        kwargs["temperature"] = temperature
         response = await self._session.post(self._base_url + model + "/" + COMPLETE_PATH,
                                             json=kwargs,
                                             headers=self._headers,

diff --git a/llm_client/llm_api_client/aleph_alpha_client.py b/llm_client/llm_api_client/aleph_alpha_client.py
@@ -24,13 +24,14 @@ def __init__(self, config: LLMAPIClientConfig):
             self._base_url = BASE_URL
         self._headers[AUTH_HEADER] = BEARER_TOKEN + self._api_key
 
-    async def text_completion(self, prompt: str, model: str | None = None, max_tokens: int | None = None, **kwargs) ->\
+    async def text_completion(self, prompt: str, model: str | None = None, max_tokens : int | None= None, temperature : float = 0 , **kwargs) ->\
             list[str]:
+        self._set_model_in_kwargs(kwargs, model)
         if max_tokens is None:
             raise ValueError("max_tokens must be specified")
-        self._set_model_in_kwargs(kwargs, model)
         kwargs[PROMPT_KEY] = prompt
-        kwargs[MAX_TOKENS_KEY] = max_tokens
+        kwargs["maximum_tokens"] = kwargs.pop("maximum_tokens", max_tokens)
+        kwargs["temperature"]  = temperature
         response = await self._session.post(self._base_url + COMPLETE_PATH,
                                             json=kwargs,
                                             headers=self._headers,
@@ -39,7 +40,7 @@ async def text_completion(self, prompt: str, model: str | None = None, max_token
         completions = response_json[COMPLETIONS_KEY]
         return [completion[TEXT_KEY] for completion in completions]
 
-    async def embedding(self, text: str, model: str | None = None, representation: str = REPRESENTATION_DEFAULT_VALUE,
+    async def embedding(self, text: str, model: str | None = None,representation: str = REPRESENTATION_DEFAULT_VALUE,
                         **kwargs) -> list[float]:
         self._set_model_in_kwargs(kwargs, model)
         kwargs[REPRESENTATION_KEY] = representation

diff --git a/llm_client/llm_api_client/anthropic_client.py b/llm_client/llm_api_client/anthropic_client.py
@@ -3,7 +3,6 @@
 from llm_client.llm_api_client.base_llm_api_client import BaseLLMAPIClient, LLMAPIClientConfig
 from llm_client.consts import PROMPT_KEY
 
-
 COMPLETE_PATH = "complete"
 BASE_URL = "https://api.anthropic.com/v1/"
 COMPLETIONS_KEY = "completion"
@@ -21,13 +20,15 @@ def __init__(self, config: LLMAPIClientConfig):
         self._headers[ACCEPT_HEADER] = ACCEPT_VALUE
         self._headers[AUTH_HEADER] = self._api_key
 
-    async def text_completion(self, prompt: str, model: str | None = None, max_tokens: int | None = None, **kwargs) ->\
+    async def text_completion(self, prompt: str, model: str | None = None, max_tokens: int | None = None, temperature: float = 1,
+                              **kwargs) -> \
             list[str]:
-        if max_tokens is None:
-            raise ValueError("max_tokens must be specified")
+        if max_tokens is None and kwargs.get(MAX_TOKENS_KEY) is None:
+            raise ValueError(f"max_tokens or {MAX_TOKENS_KEY} must be specified")
         self._set_model_in_kwargs(kwargs, model)
         kwargs[PROMPT_KEY] = prompt
-        kwargs[MAX_TOKENS_KEY] = max_tokens
+        kwargs[MAX_TOKENS_KEY] = kwargs.pop(MAX_TOKENS_KEY, max_tokens)
+        kwargs["temperature"] = temperature
         response = await self._session.post(self._base_url + COMPLETE_PATH,
                                             json=kwargs,
                                             headers=self._headers,

diff --git a/llm_client/llm_api_client/base_llm_api_client.py b/llm_client/llm_api_client/base_llm_api_client.py
@@ -29,7 +29,8 @@ def __init__(self, config: LLMAPIClientConfig):
         self._headers: dict[str, str] = config.headers
 
     @abstractmethod
-    async def text_completion(self, prompt: str, model: str | None = None, **kwargs) -> list[str]:
+    async def text_completion(self, prompt: str, model: str | None = None, max_tokens: int | None = None,
+                              temperature: float | None = None, **kwargs) -> list[str]:
         raise NotImplementedError()
 
     async def embedding(self, text: str, model: str | None = None, **kwargs) -> list[float]:

diff --git a/llm_client/llm_api_client/huggingface_client.py b/llm_client/llm_api_client/huggingface_client.py
@@ -12,6 +12,8 @@
 CONST_SLASH = '/'
 EMPTY_STR = ''
 NEWLINE = '\n'
+TEMPERATURE_KEY = "temperature"
+TOKENS_KEY = "max_length"
 
 
 class HuggingFaceClient(BaseLLMAPIClient):
@@ -23,9 +25,12 @@ def __init__(self, config: LLMAPIClientConfig):
             self._default_model = DEFAULT_MODEL
         self._headers[AUTH_HEADER] = BEARER_TOKEN + self._api_key
 
-    async def text_completion(self, prompt: str, model: str | None = None, **kwargs) -> list[str]:
+    async def text_completion(self, prompt: str, max_tokens: int | None = None, temperature: float = 1.0,
+                              model: str | None = None, **kwargs) -> list[str]:
         model = model or self._default_model
         kwargs[INPUT_KEY] = prompt
+        kwargs[TEMPERATURE_KEY] = temperature
+        kwargs[TOKENS_KEY] = kwargs.pop(TOKENS_KEY, max_tokens)
         response = await self._session.post(self._base_url + model + CONST_SLASH,
                                             json=kwargs,
                                             headers=self._headers,

diff --git a/llm_client/llm_api_client/openai_client.py b/llm_client/llm_api_client/openai_client.py
@@ -35,15 +35,21 @@ def __init__(self, config: LLMAPIClientConfig):
         openai.aiosession.set(self._session)
         self._client = openai
 
-    async def text_completion(self, prompt: str, model: str | None = None, **kwargs) -> list[str]:
+    async def text_completion(self, prompt: str, model: str | None = None,temperature: float = 0,
+        max_tokens: int = 16 , **kwargs) -> list[str]:
         self._set_model_in_kwargs(kwargs, model)
         kwargs[PROMPT_KEY] = prompt
+        kwargs["temperature"] = temperature
+        kwargs["max_tokens"] = max_tokens
         completions = await self._client.Completion.acreate(headers=self._headers, **kwargs)
         return [choice.text for choice in completions.choices]
 
-    async def chat_completion(self, messages: list[ChatMessage], model: str | None = None, **kwargs) -> list[str]:
+    async def chat_completion(self, messages: list[ChatMessage],  temperature: float = 0,
+        max_tokens: int = 16 ,model: str | None = None, **kwargs) -> list[str]:
         self._set_model_in_kwargs(kwargs, model)
         kwargs["messages"] = [message.to_dict() for message in messages]
+        kwargs["temperature"] = temperature
+        kwargs["max_tokens"] = max_tokens
         completions = await self._client.ChatCompletion.acreate(headers=self._headers, **kwargs)
         return [choice.message.content for choice in completions.choices]
 

diff --git a/tests/llm_api_client/ai21_client/test_ai21.py b/tests/llm_api_client/ai21_client/test_ai21.py
@@ -29,8 +29,8 @@ async def test_text_completion__sanity(mock_aioresponse, llm_client, url):
         ' things!\n\nI love entertaining, entertaining and decorating my home, entertaining clients, entertaining '
         'friends, entertaining family...you get the point! One of my favorite things to do is plan parties']
     mock_aioresponse.assert_called_once_with(url, method='POST',
-                                             headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key},
-                                             json={'prompt': 'These are a few of my favorite'},
+                                             headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key },
+                                             json={'prompt': 'These are a few of my favorite', "maxTokens" : 16, "temperature" : 0.7 },
                                              raise_for_status=True)
 
 
@@ -49,7 +49,7 @@ async def test_text_completion__return_multiple_completions(mock_aioresponse, ll
     ]
     mock_aioresponse.assert_called_once_with(url, method='POST',
                                              headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key},
-                                             json={'prompt': 'These are a few of my favorite'},
+                                             json={'prompt': 'These are a few of my favorite', "maxTokens" : 16, "temperature" : 0.7 },
                                              raise_for_status=True)
 
 
@@ -69,7 +69,7 @@ async def test_text_completion__override_model(mock_aioresponse, llm_client):
         'friends, entertaining family...you get the point! One of my favorite things to do is plan parties']
     mock_aioresponse.assert_called_once_with(url, method='POST',
                                              headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key},
-                                             json={'prompt': 'These are a few of my favorite'},
+                                             json={'prompt': 'These are a few of my favorite', "maxTokens" : 16, "temperature" : 0.7 },
                                              raise_for_status=True)
 
 
@@ -87,7 +87,7 @@ async def test_text_completion__with_kwargs(mock_aioresponse, llm_client, url):
         'friends, entertaining family...you get the point! One of my favorite things to do is plan parties']
     mock_aioresponse.assert_called_once_with(url, method='POST',
                                              headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key},
-                                             json={'prompt': 'These are a few of my favorite', 'max_tokens': 10},
+                                             json={'prompt': 'These are a few of my favorite', "maxTokens" : 10, "temperature" : 0.7 },
                                              raise_for_status=True)
 
 

diff --git a/tests/llm_api_client/anthropic_client/test_anthropic_client.py b/tests/llm_api_client/anthropic_client/test_anthropic_client.py
@@ -31,7 +31,7 @@ async def test_text_completion__sanity(mock_aioresponse, llm_client, complete_ur
                                              headers={AUTH_HEADER: llm_client._api_key,
                                                       ACCEPT_HEADER: ACCEPT_VALUE},
                                              json={PROMPT_KEY: 'These are a few of my favorite',
-                                                   MAX_TOKENS_KEY: 10,
+                                                   MAX_TOKENS_KEY: 10, "temperature": 1,
                                                    MODEL_KEY: llm_client._default_model},
                                              raise_for_status=True)
 
@@ -58,7 +58,7 @@ async def test_text_completion__override_model(mock_aioresponse, llm_client, com
                                              headers={AUTH_HEADER: llm_client._api_key,
                                                       ACCEPT_HEADER: ACCEPT_VALUE},
                                              json={PROMPT_KEY: 'These are a few of my favorite',
-                                                   MAX_TOKENS_KEY: 10,
+                                                   MAX_TOKENS_KEY: 10, "temperature": 1,
                                                    MODEL_KEY: new_model_name},
                                              raise_for_status=True)
 

diff --git a/tests/llm_api_client/huggingface_client/test_huggingface.py b/tests/llm_api_client/huggingface_client/test_huggingface.py
@@ -28,7 +28,7 @@ async def test_text_completion__sanity(mock_aioresponse, llm_client, url):
     assert actual == ['Kobe Bryant is a retired professional basketball player who played for the Los Angeles Lakers of']
     mock_aioresponse.assert_called_once_with(url, method='POST',
                                              headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key},
-                                             json={'inputs': 'who is kobe bryant'},
+                                             json={'inputs': 'who is kobe bryant',"max_length": None, "temperature": 1.0},
                                              raise_for_status=True)
 
 
@@ -44,7 +44,7 @@ async def test_text_completion__with_kwargs(mock_aioresponse, llm_client, url):
     assert actual == ['Kobe Bryant is a retired professional basketball player who played for the Los Angeles Lakers of']
     mock_aioresponse.assert_called_once_with(url, method='POST',
                                              headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key},
-                                             json={'inputs': 'who is kobe bryant','max_tokens' : 10},
+                                             json={'inputs': 'who is kobe bryant',"max_length": 10, "temperature": 1.0},
                                              raise_for_status=True)
 
 

diff --git a/tests/llm_api_client/openai_client/test_openai.py b/tests/llm_api_client/openai_client/test_openai.py
@@ -37,7 +37,7 @@ async def test_text_completion__sanity(openai_mock, open_ai_client, model_name):
     openai_mock.Completion.acreate.assert_awaited_once_with(
         model=model_name,
         prompt="These are a few of my favorite",
-        headers={})
+        headers={},temperature=0,max_tokens=16)
 
 
 @pytest.mark.asyncio
@@ -52,7 +52,7 @@ async def test_text_completion__return_multiple_completions(openai_mock, open_ai
     openai_mock.Completion.acreate.assert_awaited_once_with(
         model=model_name,
         prompt="These are a few of my favorite",
-        headers={})
+        headers={},temperature=0,max_tokens=16)
 
 
 @pytest.mark.asyncio
@@ -67,7 +67,7 @@ async def test_text_completion__override_model(openai_mock, open_ai_client, mode
     openai_mock.Completion.acreate.assert_awaited_once_with(
         model=new_model_name,
         prompt="These are a few of my favorite",
-        headers={})
+        headers={},temperature=0,max_tokens=16)
 
 
 @pytest.mark.asyncio
@@ -81,7 +81,7 @@ async def test_text_completion__with_kwargs(openai_mock, open_ai_client, model_n
     openai_mock.Completion.acreate.assert_awaited_once_with(
         model=model_name,
         prompt="These are a few of my favorite",
-        max_tokens=10,
+        temperature=0,max_tokens=10,
         headers={})
 
 
@@ -98,7 +98,7 @@ async def test_text_completion__with_headers(openai_mock, model_name):
     openai_mock.Completion.acreate.assert_awaited_once_with(
         model=model_name,
         prompt="These are a few of my favorite",
-        headers={"header_name": "header_value"})
+        headers={"header_name": "header_value"},temperature=0,max_tokens=16)
 
 
 @pytest.mark.asyncio
@@ -112,7 +112,7 @@ async def test_chat_completion__sanity(openai_mock, open_ai_client, model_name):
     openai_mock.ChatCompletion.acreate.assert_awaited_once_with(
         model=model_name,
         messages=[{'content': 'Hello!', 'role': 'user'}],
-        headers={})
+        headers={},temperature=0,max_tokens=16)
 
 
 @pytest.mark.asyncio
@@ -127,7 +127,7 @@ async def test_chat_completion__return_multiple_completions(openai_mock, open_ai
     openai_mock.ChatCompletion.acreate.assert_awaited_once_with(
         model=model_name,
         messages=[{'content': 'Hello!', 'role': 'user'}],
-        headers={})
+        headers={},temperature=0,max_tokens=16)
 
 
 @pytest.mark.asyncio
@@ -142,7 +142,7 @@ async def test_chat_completion__override_model(openai_mock, open_ai_client, mode
     openai_mock.ChatCompletion.acreate.assert_awaited_once_with(
         model=new_model_name,
         messages=[{'content': 'Hello!', 'role': 'user'}],
-        headers={})
+        headers={},temperature=0,max_tokens=16)
 
 
 @pytest.mark.asyncio
@@ -157,7 +157,7 @@ async def test_chat_completion__with_kwargs(openai_mock, open_ai_client, model_n
         model=model_name,
         messages=[{'content': 'Hello!', 'role': 'user'}],
         max_tokens=10,
-        headers={})
+        headers={},temperature=0)
 
 
 @pytest.mark.asyncio
@@ -173,7 +173,7 @@ async def test_chat_completion__with_headers(openai_mock, model_name):
     openai_mock.ChatCompletion.acreate.assert_awaited_once_with(
         model=model_name,
         messages=[{'content': 'Hello!', 'role': 'user'}],
-        headers={"header_name": "header_value"})
+        headers={"header_name": "header_value"},temperature=0,max_tokens=16)
 
 
 @pytest.mark.asyncio