mirror of
https://github.com/ollama/ollama-python.git
synced 2026-02-05 07:06:51 -06:00
Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dbccf192ac | ||
|
|
60e7b2f9ce | ||
|
|
d1d704050b | ||
|
|
115792583e | ||
|
|
0008226fda | ||
|
|
9ddd5f0182 |
2
.github/workflows/publish.yaml
vendored
2
.github/workflows/publish.yaml
vendored
@ -13,7 +13,7 @@ jobs:
|
||||
id-token: write
|
||||
contents: write
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/setup-python@v6
|
||||
- uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
|
||||
4
.github/workflows/test.yaml
vendored
4
.github/workflows/test.yaml
vendored
@ -10,7 +10,7 @@ jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
- uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
enable-cache: true
|
||||
@ -19,7 +19,7 @@ jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/setup-python@v6
|
||||
- uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
|
||||
77
README.md
77
README.md
@ -50,6 +50,82 @@ for chunk in stream:
|
||||
print(chunk['message']['content'], end='', flush=True)
|
||||
```
|
||||
|
||||
## Cloud Models
|
||||
|
||||
Run larger models by offloading to Ollama’s cloud while keeping your local workflow.
|
||||
|
||||
- Supported models: `deepseek-v3.1:671b-cloud`, `gpt-oss:20b-cloud`, `gpt-oss:120b-cloud`, `kimi-k2:1t-cloud`, `qwen3-coder:480b-cloud`, `kimi-k2-thinking` See [Ollama Models - Cloud](https://ollama.com/search?c=cloud) for more information
|
||||
|
||||
### Run via local Ollama
|
||||
|
||||
1) Sign in (one-time):
|
||||
|
||||
```
|
||||
ollama signin
|
||||
```
|
||||
|
||||
2) Pull a cloud model:
|
||||
|
||||
```
|
||||
ollama pull gpt-oss:120b-cloud
|
||||
```
|
||||
|
||||
3) Make a request:
|
||||
|
||||
```python
|
||||
from ollama import Client
|
||||
|
||||
client = Client()
|
||||
|
||||
messages = [
|
||||
{
|
||||
'role': 'user',
|
||||
'content': 'Why is the sky blue?',
|
||||
},
|
||||
]
|
||||
|
||||
for part in client.chat('gpt-oss:120b-cloud', messages=messages, stream=True):
|
||||
print(part.message.content, end='', flush=True)
|
||||
```
|
||||
|
||||
### Cloud API (ollama.com)
|
||||
|
||||
Access cloud models directly by pointing the client at `https://ollama.com`.
|
||||
|
||||
1) Create an API key from [ollama.com](https://ollama.com/settings/keys) , then set:
|
||||
|
||||
```
|
||||
export OLLAMA_API_KEY=your_api_key
|
||||
```
|
||||
|
||||
2) (Optional) List models available via the API:
|
||||
|
||||
```
|
||||
curl https://ollama.com/api/tags
|
||||
```
|
||||
|
||||
3) Generate a response via the cloud API:
|
||||
|
||||
```python
|
||||
import os
|
||||
from ollama import Client
|
||||
|
||||
client = Client(
|
||||
host='https://ollama.com',
|
||||
headers={'Authorization': 'Bearer ' + os.environ.get('OLLAMA_API_KEY')}
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
'role': 'user',
|
||||
'content': 'Why is the sky blue?',
|
||||
},
|
||||
]
|
||||
|
||||
for part in client.chat('gpt-oss:120b', messages=messages, stream=True):
|
||||
print(part.message.content, end='', flush=True)
|
||||
```
|
||||
|
||||
## Custom client
|
||||
A custom client can be created by instantiating `Client` or `AsyncClient` from `ollama`.
|
||||
|
||||
@ -174,7 +250,6 @@ ollama.embed(model='gemma3', input=['The sky is blue because of rayleigh scatter
|
||||
ollama.ps()
|
||||
```
|
||||
|
||||
|
||||
## Errors
|
||||
|
||||
Errors are raised if requests return an error status or if an error is detected while streaming.
|
||||
|
||||
@ -78,6 +78,12 @@ Configuration to use with an MCP client:
|
||||
- [multimodal-chat.py](multimodal-chat.py)
|
||||
- [multimodal-generate.py](multimodal-generate.py)
|
||||
|
||||
### Image Generation (Experimental) - Generate images with a model
|
||||
|
||||
> **Note:** Image generation is experimental and currently only available on macOS.
|
||||
|
||||
- [generate-image.py](generate-image.py)
|
||||
|
||||
### Structured Outputs - Generate structured outputs with a model
|
||||
|
||||
- [structured-outputs.py](structured-outputs.py)
|
||||
|
||||
31
examples/chat-logprobs.py
Normal file
31
examples/chat-logprobs.py
Normal file
@ -0,0 +1,31 @@
|
||||
from typing import Iterable
|
||||
|
||||
import ollama
|
||||
|
||||
|
||||
def print_logprobs(logprobs: Iterable[dict], label: str) -> None:
|
||||
print(f'\n{label}:')
|
||||
for entry in logprobs:
|
||||
token = entry.get('token', '')
|
||||
logprob = entry.get('logprob')
|
||||
print(f' token={token!r:<12} logprob={logprob:.3f}')
|
||||
for alt in entry.get('top_logprobs', []):
|
||||
if alt['token'] != token:
|
||||
print(f' alt -> {alt["token"]!r:<12} ({alt["logprob"]:.3f})')
|
||||
|
||||
|
||||
messages = [
|
||||
{
|
||||
'role': 'user',
|
||||
'content': 'hi! be concise.',
|
||||
},
|
||||
]
|
||||
|
||||
response = ollama.chat(
|
||||
model='gemma3',
|
||||
messages=messages,
|
||||
logprobs=True,
|
||||
top_logprobs=3,
|
||||
)
|
||||
print('Chat response:', response['message']['content'])
|
||||
print_logprobs(response.get('logprobs', []), 'chat logprobs')
|
||||
@ -15,7 +15,8 @@ messages = [
|
||||
},
|
||||
{
|
||||
'role': 'assistant',
|
||||
'content': 'The weather in Tokyo is typically warm and humid during the summer months, with temperatures often exceeding 30°C (86°F). The city experiences a rainy season from June to September, with heavy rainfall and occasional typhoons. Winter is mild, with temperatures rarely dropping below freezing. The city is known for its high-tech and vibrant culture, with many popular tourist attractions such as the Tokyo Tower, Senso-ji Temple, and the bustling Shibuya district.',
|
||||
'content': """The weather in Tokyo is typically warm and humid during the summer months, with temperatures often exceeding 30°C (86°F). The city experiences a rainy season from June to September, with heavy rainfall and occasional typhoons. Winter is mild, with temperatures
|
||||
rarely dropping below freezing. The city is known for its high-tech and vibrant culture, with many popular tourist attractions such as the Tokyo Tower, Senso-ji Temple, and the bustling Shibuya district.""",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
18
examples/generate-image.py
Normal file
18
examples/generate-image.py
Normal file
@ -0,0 +1,18 @@
|
||||
# Image generation is experimental and currently only available on macOS
|
||||
|
||||
import base64
|
||||
|
||||
from ollama import generate
|
||||
|
||||
prompt = 'a sunset over mountains'
|
||||
print(f'Prompt: {prompt}')
|
||||
|
||||
for response in generate(model='x/z-image-turbo', prompt=prompt, stream=True):
|
||||
if response.image:
|
||||
# Final response contains the image
|
||||
with open('output.png', 'wb') as f:
|
||||
f.write(base64.b64decode(response.image))
|
||||
print('\nImage saved to output.png')
|
||||
elif response.total:
|
||||
# Progress update
|
||||
print(f'Progress: {response.completed or 0}/{response.total}', end='\r')
|
||||
24
examples/generate-logprobs.py
Normal file
24
examples/generate-logprobs.py
Normal file
@ -0,0 +1,24 @@
|
||||
from typing import Iterable
|
||||
|
||||
import ollama
|
||||
|
||||
|
||||
def print_logprobs(logprobs: Iterable[dict], label: str) -> None:
|
||||
print(f'\n{label}:')
|
||||
for entry in logprobs:
|
||||
token = entry.get('token', '')
|
||||
logprob = entry.get('logprob')
|
||||
print(f' token={token!r:<12} logprob={logprob:.3f}')
|
||||
for alt in entry.get('top_logprobs', []):
|
||||
if alt['token'] != token:
|
||||
print(f' alt -> {alt["token"]!r:<12} ({alt["logprob"]:.3f})')
|
||||
|
||||
|
||||
response = ollama.generate(
|
||||
model='gemma3',
|
||||
prompt='hi! be concise.',
|
||||
logprobs=True,
|
||||
top_logprobs=3,
|
||||
)
|
||||
print('Generate response:', response['response'])
|
||||
print_logprobs(response.get('logprobs', []), 'generate logprobs')
|
||||
@ -49,7 +49,7 @@ print('Query: ', query)
|
||||
|
||||
messages = [{'role': 'user', 'content': query}]
|
||||
while True:
|
||||
response = chat(model='deepseek-v3.1:671b-cloud', messages=messages, tools=[web_search, web_fetch], think=True)
|
||||
response = chat(model='qwen3', messages=messages, tools=[web_search, web_fetch], think=True)
|
||||
if response.message.thinking:
|
||||
print('Thinking: ')
|
||||
print(response.message.thinking + '\n\n')
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import contextlib
|
||||
import ipaddress
|
||||
import json
|
||||
import os
|
||||
@ -75,7 +76,7 @@ from ollama._types import (
|
||||
T = TypeVar('T')
|
||||
|
||||
|
||||
class BaseClient:
|
||||
class BaseClient(contextlib.AbstractContextManager, contextlib.AbstractAsyncContextManager):
|
||||
def __init__(
|
||||
self,
|
||||
client,
|
||||
@ -116,6 +117,12 @@ class BaseClient:
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
await self.close()
|
||||
|
||||
|
||||
CONNECTION_ERROR_MESSAGE = 'Failed to connect to Ollama. Please check that Ollama is downloaded, running and accessible. https://ollama.com/download'
|
||||
|
||||
@ -124,6 +131,9 @@ class Client(BaseClient):
|
||||
def __init__(self, host: Optional[str] = None, **kwargs) -> None:
|
||||
super().__init__(httpx.Client, host, **kwargs)
|
||||
|
||||
def close(self):
|
||||
self._client.close()
|
||||
|
||||
def _request_raw(self, *args, **kwargs):
|
||||
try:
|
||||
r = self._client.request(*args, **kwargs)
|
||||
@ -200,11 +210,16 @@ class Client(BaseClient):
|
||||
context: Optional[Sequence[int]] = None,
|
||||
stream: Literal[False] = False,
|
||||
think: Optional[bool] = None,
|
||||
logprobs: Optional[bool] = None,
|
||||
top_logprobs: Optional[int] = None,
|
||||
raw: bool = False,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
images: Optional[Sequence[Union[str, bytes, Image]]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
width: Optional[int] = None,
|
||||
height: Optional[int] = None,
|
||||
steps: Optional[int] = None,
|
||||
) -> GenerateResponse: ...
|
||||
|
||||
@overload
|
||||
@ -219,11 +234,16 @@ class Client(BaseClient):
|
||||
context: Optional[Sequence[int]] = None,
|
||||
stream: Literal[True] = True,
|
||||
think: Optional[bool] = None,
|
||||
logprobs: Optional[bool] = None,
|
||||
top_logprobs: Optional[int] = None,
|
||||
raw: bool = False,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
images: Optional[Sequence[Union[str, bytes, Image]]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
width: Optional[int] = None,
|
||||
height: Optional[int] = None,
|
||||
steps: Optional[int] = None,
|
||||
) -> Iterator[GenerateResponse]: ...
|
||||
|
||||
def generate(
|
||||
@ -237,11 +257,16 @@ class Client(BaseClient):
|
||||
context: Optional[Sequence[int]] = None,
|
||||
stream: bool = False,
|
||||
think: Optional[bool] = None,
|
||||
logprobs: Optional[bool] = None,
|
||||
top_logprobs: Optional[int] = None,
|
||||
raw: Optional[bool] = None,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
images: Optional[Sequence[Union[str, bytes, Image]]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
width: Optional[int] = None,
|
||||
height: Optional[int] = None,
|
||||
steps: Optional[int] = None,
|
||||
) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
|
||||
"""
|
||||
Create a response using the requested model.
|
||||
@ -266,11 +291,16 @@ class Client(BaseClient):
|
||||
context=context,
|
||||
stream=stream,
|
||||
think=think,
|
||||
logprobs=logprobs,
|
||||
top_logprobs=top_logprobs,
|
||||
raw=raw,
|
||||
format=format,
|
||||
images=list(_copy_images(images)) if images else None,
|
||||
options=options,
|
||||
keep_alive=keep_alive,
|
||||
width=width,
|
||||
height=height,
|
||||
steps=steps,
|
||||
).model_dump(exclude_none=True),
|
||||
stream=stream,
|
||||
)
|
||||
@ -284,6 +314,8 @@ class Client(BaseClient):
|
||||
tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
|
||||
stream: Literal[False] = False,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
logprobs: Optional[bool] = None,
|
||||
top_logprobs: Optional[int] = None,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
@ -298,6 +330,8 @@ class Client(BaseClient):
|
||||
tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
|
||||
stream: Literal[True] = True,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
logprobs: Optional[bool] = None,
|
||||
top_logprobs: Optional[int] = None,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
@ -311,6 +345,8 @@ class Client(BaseClient):
|
||||
tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
|
||||
stream: bool = False,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
logprobs: Optional[bool] = None,
|
||||
top_logprobs: Optional[int] = None,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
@ -358,6 +394,8 @@ class Client(BaseClient):
|
||||
tools=list(_copy_tools(tools)),
|
||||
stream=stream,
|
||||
think=think,
|
||||
logprobs=logprobs,
|
||||
top_logprobs=top_logprobs,
|
||||
format=format,
|
||||
options=options,
|
||||
keep_alive=keep_alive,
|
||||
@ -686,6 +724,9 @@ class AsyncClient(BaseClient):
|
||||
def __init__(self, host: Optional[str] = None, **kwargs) -> None:
|
||||
super().__init__(httpx.AsyncClient, host, **kwargs)
|
||||
|
||||
async def close(self):
|
||||
await self._client.aclose()
|
||||
|
||||
async def _request_raw(self, *args, **kwargs):
|
||||
try:
|
||||
r = await self._client.request(*args, **kwargs)
|
||||
@ -802,11 +843,16 @@ class AsyncClient(BaseClient):
|
||||
context: Optional[Sequence[int]] = None,
|
||||
stream: Literal[False] = False,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
logprobs: Optional[bool] = None,
|
||||
top_logprobs: Optional[int] = None,
|
||||
raw: bool = False,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
images: Optional[Sequence[Union[str, bytes, Image]]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
width: Optional[int] = None,
|
||||
height: Optional[int] = None,
|
||||
steps: Optional[int] = None,
|
||||
) -> GenerateResponse: ...
|
||||
|
||||
@overload
|
||||
@ -821,11 +867,16 @@ class AsyncClient(BaseClient):
|
||||
context: Optional[Sequence[int]] = None,
|
||||
stream: Literal[True] = True,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
logprobs: Optional[bool] = None,
|
||||
top_logprobs: Optional[int] = None,
|
||||
raw: bool = False,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
images: Optional[Sequence[Union[str, bytes, Image]]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
width: Optional[int] = None,
|
||||
height: Optional[int] = None,
|
||||
steps: Optional[int] = None,
|
||||
) -> AsyncIterator[GenerateResponse]: ...
|
||||
|
||||
async def generate(
|
||||
@ -839,11 +890,16 @@ class AsyncClient(BaseClient):
|
||||
context: Optional[Sequence[int]] = None,
|
||||
stream: bool = False,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
logprobs: Optional[bool] = None,
|
||||
top_logprobs: Optional[int] = None,
|
||||
raw: Optional[bool] = None,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
images: Optional[Sequence[Union[str, bytes, Image]]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
width: Optional[int] = None,
|
||||
height: Optional[int] = None,
|
||||
steps: Optional[int] = None,
|
||||
) -> Union[GenerateResponse, AsyncIterator[GenerateResponse]]:
|
||||
"""
|
||||
Create a response using the requested model.
|
||||
@ -867,11 +923,16 @@ class AsyncClient(BaseClient):
|
||||
context=context,
|
||||
stream=stream,
|
||||
think=think,
|
||||
logprobs=logprobs,
|
||||
top_logprobs=top_logprobs,
|
||||
raw=raw,
|
||||
format=format,
|
||||
images=list(_copy_images(images)) if images else None,
|
||||
options=options,
|
||||
keep_alive=keep_alive,
|
||||
width=width,
|
||||
height=height,
|
||||
steps=steps,
|
||||
).model_dump(exclude_none=True),
|
||||
stream=stream,
|
||||
)
|
||||
@ -885,6 +946,8 @@ class AsyncClient(BaseClient):
|
||||
tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
|
||||
stream: Literal[False] = False,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
logprobs: Optional[bool] = None,
|
||||
top_logprobs: Optional[int] = None,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
@ -899,6 +962,8 @@ class AsyncClient(BaseClient):
|
||||
tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
|
||||
stream: Literal[True] = True,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
logprobs: Optional[bool] = None,
|
||||
top_logprobs: Optional[int] = None,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
@ -912,6 +977,8 @@ class AsyncClient(BaseClient):
|
||||
tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
|
||||
stream: bool = False,
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None,
|
||||
logprobs: Optional[bool] = None,
|
||||
top_logprobs: Optional[int] = None,
|
||||
format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
|
||||
options: Optional[Union[Mapping[str, Any], Options]] = None,
|
||||
keep_alive: Optional[Union[float, str]] = None,
|
||||
@ -960,6 +1027,8 @@ class AsyncClient(BaseClient):
|
||||
tools=list(_copy_tools(tools)),
|
||||
stream=stream,
|
||||
think=think,
|
||||
logprobs=logprobs,
|
||||
top_logprobs=top_logprobs,
|
||||
format=format,
|
||||
options=options,
|
||||
keep_alive=keep_alive,
|
||||
|
||||
@ -210,6 +210,22 @@ class GenerateRequest(BaseGenerateRequest):
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None
|
||||
'Enable thinking mode (for thinking models).'
|
||||
|
||||
logprobs: Optional[bool] = None
|
||||
'Return log probabilities for generated tokens.'
|
||||
|
||||
top_logprobs: Optional[int] = None
|
||||
'Number of alternative tokens and log probabilities to include per position (0-20).'
|
||||
|
||||
# Experimental image generation parameters
|
||||
width: Optional[int] = None
|
||||
'Width of the generated image in pixels (for image generation models).'
|
||||
|
||||
height: Optional[int] = None
|
||||
'Height of the generated image in pixels (for image generation models).'
|
||||
|
||||
steps: Optional[int] = None
|
||||
'Number of diffusion steps (for image generation models).'
|
||||
|
||||
|
||||
class BaseGenerateResponse(SubscriptableBaseModel):
|
||||
model: Optional[str] = None
|
||||
@ -243,12 +259,25 @@ class BaseGenerateResponse(SubscriptableBaseModel):
|
||||
'Duration of evaluating inference in nanoseconds.'
|
||||
|
||||
|
||||
class TokenLogprob(SubscriptableBaseModel):
|
||||
token: str
|
||||
'Token text.'
|
||||
|
||||
logprob: float
|
||||
'Log probability for the token.'
|
||||
|
||||
|
||||
class Logprob(TokenLogprob):
|
||||
top_logprobs: Optional[Sequence[TokenLogprob]] = None
|
||||
'Most likely tokens and their log probabilities.'
|
||||
|
||||
|
||||
class GenerateResponse(BaseGenerateResponse):
|
||||
"""
|
||||
Response returned by generate requests.
|
||||
"""
|
||||
|
||||
response: str
|
||||
response: Optional[str] = None
|
||||
'Response content. When streaming, this contains a fragment of the response.'
|
||||
|
||||
thinking: Optional[str] = None
|
||||
@ -257,6 +286,20 @@ class GenerateResponse(BaseGenerateResponse):
|
||||
context: Optional[Sequence[int]] = None
|
||||
'Tokenized history up to the point of the response.'
|
||||
|
||||
logprobs: Optional[Sequence[Logprob]] = None
|
||||
'Log probabilities for generated tokens.'
|
||||
|
||||
# Image generation response fields
|
||||
image: Optional[str] = None
|
||||
'Base64-encoded generated image data (for image generation models).'
|
||||
|
||||
# Streaming progress fields (for image generation)
|
||||
completed: Optional[int] = None
|
||||
'Number of completed steps (for image generation streaming).'
|
||||
|
||||
total: Optional[int] = None
|
||||
'Total number of steps (for image generation streaming).'
|
||||
|
||||
|
||||
class Message(SubscriptableBaseModel):
|
||||
"""
|
||||
@ -360,6 +403,12 @@ class ChatRequest(BaseGenerateRequest):
|
||||
think: Optional[Union[bool, Literal['low', 'medium', 'high']]] = None
|
||||
'Enable thinking mode (for thinking models).'
|
||||
|
||||
logprobs: Optional[bool] = None
|
||||
'Return log probabilities for generated tokens.'
|
||||
|
||||
top_logprobs: Optional[int] = None
|
||||
'Number of alternative tokens and log probabilities to include per position (0-20).'
|
||||
|
||||
|
||||
class ChatResponse(BaseGenerateResponse):
|
||||
"""
|
||||
@ -369,6 +418,9 @@ class ChatResponse(BaseGenerateResponse):
|
||||
message: Message
|
||||
'Response message.'
|
||||
|
||||
logprobs: Optional[Sequence[Logprob]] = None
|
||||
'Log probabilities for generated tokens if requested.'
|
||||
|
||||
|
||||
class EmbedRequest(BaseRequest):
|
||||
input: Union[str, Sequence[str]]
|
||||
|
||||
@ -37,7 +37,7 @@ dependencies = [ 'ruff>=0.9.1' ]
|
||||
config-path = 'none'
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 999
|
||||
line-length = 320
|
||||
indent-width = 2
|
||||
|
||||
[tool.ruff.format]
|
||||
|
||||
@ -61,6 +61,44 @@ def test_client_chat(httpserver: HTTPServer):
|
||||
assert response['message']['content'] == "I don't know."
|
||||
|
||||
|
||||
def test_client_chat_with_logprobs(httpserver: HTTPServer):
|
||||
httpserver.expect_ordered_request(
|
||||
'/api/chat',
|
||||
method='POST',
|
||||
json={
|
||||
'model': 'dummy',
|
||||
'messages': [{'role': 'user', 'content': 'Hi'}],
|
||||
'tools': [],
|
||||
'stream': False,
|
||||
'logprobs': True,
|
||||
'top_logprobs': 3,
|
||||
},
|
||||
).respond_with_json(
|
||||
{
|
||||
'model': 'dummy',
|
||||
'message': {
|
||||
'role': 'assistant',
|
||||
'content': 'Hello',
|
||||
},
|
||||
'logprobs': [
|
||||
{
|
||||
'token': 'Hello',
|
||||
'logprob': -0.1,
|
||||
'top_logprobs': [
|
||||
{'token': 'Hello', 'logprob': -0.1},
|
||||
{'token': 'Hi', 'logprob': -1.0},
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
client = Client(httpserver.url_for('/'))
|
||||
response = client.chat('dummy', messages=[{'role': 'user', 'content': 'Hi'}], logprobs=True, top_logprobs=3)
|
||||
assert response['logprobs'][0]['token'] == 'Hello'
|
||||
assert response['logprobs'][0]['top_logprobs'][1]['token'] == 'Hi'
|
||||
|
||||
|
||||
def test_client_chat_stream(httpserver: HTTPServer):
|
||||
def stream_handler(_: Request):
|
||||
def generate():
|
||||
@ -294,6 +332,40 @@ def test_client_generate(httpserver: HTTPServer):
|
||||
assert response['response'] == 'Because it is.'
|
||||
|
||||
|
||||
def test_client_generate_with_logprobs(httpserver: HTTPServer):
|
||||
httpserver.expect_ordered_request(
|
||||
'/api/generate',
|
||||
method='POST',
|
||||
json={
|
||||
'model': 'dummy',
|
||||
'prompt': 'Why',
|
||||
'stream': False,
|
||||
'logprobs': True,
|
||||
'top_logprobs': 2,
|
||||
},
|
||||
).respond_with_json(
|
||||
{
|
||||
'model': 'dummy',
|
||||
'response': 'Hello',
|
||||
'logprobs': [
|
||||
{
|
||||
'token': 'Hello',
|
||||
'logprob': -0.2,
|
||||
'top_logprobs': [
|
||||
{'token': 'Hello', 'logprob': -0.2},
|
||||
{'token': 'Hi', 'logprob': -1.5},
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
client = Client(httpserver.url_for('/'))
|
||||
response = client.generate('dummy', 'Why', logprobs=True, top_logprobs=2)
|
||||
assert response['logprobs'][0]['token'] == 'Hello'
|
||||
assert response['logprobs'][0]['top_logprobs'][1]['token'] == 'Hi'
|
||||
|
||||
|
||||
def test_client_generate_with_image_type(httpserver: HTTPServer):
|
||||
httpserver.expect_ordered_request(
|
||||
'/api/generate',
|
||||
@ -496,6 +568,115 @@ async def test_async_client_generate_format_pydantic(httpserver: HTTPServer):
|
||||
assert response['response'] == '{"answer": "Because of Rayleigh scattering", "confidence": 0.95}'
|
||||
|
||||
|
||||
def test_client_generate_image(httpserver: HTTPServer):
|
||||
httpserver.expect_ordered_request(
|
||||
'/api/generate',
|
||||
method='POST',
|
||||
json={
|
||||
'model': 'dummy-image',
|
||||
'prompt': 'a sunset over mountains',
|
||||
'stream': False,
|
||||
'width': 1024,
|
||||
'height': 768,
|
||||
'steps': 20,
|
||||
},
|
||||
).respond_with_json(
|
||||
{
|
||||
'model': 'dummy-image',
|
||||
'image': PNG_BASE64,
|
||||
'done': True,
|
||||
'done_reason': 'stop',
|
||||
}
|
||||
)
|
||||
|
||||
client = Client(httpserver.url_for('/'))
|
||||
response = client.generate('dummy-image', 'a sunset over mountains', width=1024, height=768, steps=20)
|
||||
assert response['model'] == 'dummy-image'
|
||||
assert response['image'] == PNG_BASE64
|
||||
assert response['done'] is True
|
||||
|
||||
|
||||
def test_client_generate_image_stream(httpserver: HTTPServer):
|
||||
def stream_handler(_: Request):
|
||||
def generate():
|
||||
# Progress updates
|
||||
for i in range(1, 4):
|
||||
yield (
|
||||
json.dumps(
|
||||
{
|
||||
'model': 'dummy-image',
|
||||
'completed': i,
|
||||
'total': 3,
|
||||
'done': False,
|
||||
}
|
||||
)
|
||||
+ '\n'
|
||||
)
|
||||
# Final response with image
|
||||
yield (
|
||||
json.dumps(
|
||||
{
|
||||
'model': 'dummy-image',
|
||||
'image': PNG_BASE64,
|
||||
'done': True,
|
||||
'done_reason': 'stop',
|
||||
}
|
||||
)
|
||||
+ '\n'
|
||||
)
|
||||
|
||||
return Response(generate())
|
||||
|
||||
httpserver.expect_ordered_request(
|
||||
'/api/generate',
|
||||
method='POST',
|
||||
json={
|
||||
'model': 'dummy-image',
|
||||
'prompt': 'a sunset over mountains',
|
||||
'stream': True,
|
||||
'width': 512,
|
||||
'height': 512,
|
||||
},
|
||||
).respond_with_handler(stream_handler)
|
||||
|
||||
client = Client(httpserver.url_for('/'))
|
||||
response = client.generate('dummy-image', 'a sunset over mountains', stream=True, width=512, height=512)
|
||||
|
||||
parts = list(response)
|
||||
# Check progress updates
|
||||
assert parts[0]['completed'] == 1
|
||||
assert parts[0]['total'] == 3
|
||||
assert parts[0]['done'] is False
|
||||
# Check final response
|
||||
assert parts[-1]['image'] == PNG_BASE64
|
||||
assert parts[-1]['done'] is True
|
||||
|
||||
|
||||
async def test_async_client_generate_image(httpserver: HTTPServer):
|
||||
httpserver.expect_ordered_request(
|
||||
'/api/generate',
|
||||
method='POST',
|
||||
json={
|
||||
'model': 'dummy-image',
|
||||
'prompt': 'a robot painting',
|
||||
'stream': False,
|
||||
'width': 1024,
|
||||
'height': 1024,
|
||||
},
|
||||
).respond_with_json(
|
||||
{
|
||||
'model': 'dummy-image',
|
||||
'image': PNG_BASE64,
|
||||
'done': True,
|
||||
}
|
||||
)
|
||||
|
||||
client = AsyncClient(httpserver.url_for('/'))
|
||||
response = await client.generate('dummy-image', 'a robot painting', width=1024, height=1024)
|
||||
assert response['model'] == 'dummy-image'
|
||||
assert response['image'] == PNG_BASE64
|
||||
|
||||
|
||||
def test_client_pull(httpserver: HTTPServer):
|
||||
httpserver.expect_ordered_request(
|
||||
'/api/pull',
|
||||
@ -1275,3 +1456,33 @@ def test_client_explicit_bearer_header_overrides_env(monkeypatch: pytest.MonkeyP
|
||||
client = Client(headers={'Authorization': 'Bearer explicit-token'})
|
||||
assert client._client.headers['authorization'] == 'Bearer explicit-token'
|
||||
client.web_search('override check')
|
||||
|
||||
|
||||
def test_client_close():
|
||||
client = Client()
|
||||
client.close()
|
||||
assert client._client.is_closed
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_async_client_close():
|
||||
client = AsyncClient()
|
||||
await client.close()
|
||||
assert client._client.is_closed
|
||||
|
||||
|
||||
def test_client_context_manager():
|
||||
with Client() as client:
|
||||
assert isinstance(client, Client)
|
||||
assert not client._client.is_closed
|
||||
|
||||
assert client._client.is_closed
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_async_client_context_manager():
|
||||
async with AsyncClient() as client:
|
||||
assert isinstance(client, AsyncClient)
|
||||
assert not client._client.is_closed
|
||||
|
||||
assert client._client.is_closed
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user