Source code for rath.llm.chat_response

"""Frozen response types mirroring OpenAI ``chat.completion`` objects."""

from __future__ import annotations

from dataclasses import dataclass
from typing import Any, Literal, Mapping

__all__ = [
    "RathLLMTokenUsage",
    "RathLLMToolCallFunction",
    "RathLLMToolCallPart",
    "RathLLMAssistantMessage",
    "RathLLMChatChoice",
    "RathLLMChatResponse",
    "RathLLMFinishReason",
    "RathLLMStreamDelta",
    "add_usage",
]

RathLLMFinishReason = Literal[
    "stop", "length", "tool_calls", "content_filter", "function_call"
]


[docs] @dataclass(frozen=True, slots=True) class RathLLMTokenUsage: """Token counts from ``usage``; optional detail dicts stay JSON-shaped.""" prompt_tokens: int completion_tokens: int total_tokens: int completion_tokens_details: Mapping[str, Any] | None = None prompt_tokens_details: Mapping[str, Any] | None = None
[docs] @dataclass(frozen=True, slots=True) class RathLLMToolCallFunction: """``function`` payload inside a tool call (name + arguments string).""" name: str arguments: str arguments_parsed: dict[str, Any] | None arguments_parse_error: bool
[docs] @dataclass(frozen=True, slots=True) class RathLLMToolCallPart: """One entry from ``message.tool_calls``.""" id: str type: str function: RathLLMToolCallFunction
[docs] @dataclass(frozen=True, slots=True) class RathLLMAssistantMessage: """Assistant message on a choice (content, optional tool calls, provider extras).""" role: Literal["assistant"] = "assistant" content: str | None = None refusal: str | None = None reasoning_content: str | None = None tool_calls: tuple[RathLLMToolCallPart, ...] | None = None function_call: Mapping[str, Any] | None = None annotations: tuple[Mapping[str, Any], ...] | None = None
[docs] @dataclass(frozen=True, slots=True) class RathLLMChatChoice: """One element of ``choices``.""" index: int finish_reason: RathLLMFinishReason message: RathLLMAssistantMessage logprobs: Mapping[str, Any] | None = None
[docs] @dataclass(frozen=True, slots=True) class RathLLMChatResponse: """Normalized non-streaming ``ChatCompletion``.""" id: str choices: tuple[RathLLMChatChoice, ...] created: int model: str object_type: Literal["chat.completion"] = "chat.completion" service_tier: str | None = None system_fingerprint: str | None = None usage: RathLLMTokenUsage | None = None raw: Mapping[str, Any] | None = None @property def primary_choice(self) -> RathLLMChatChoice: """The first choice (typical when ``n`` is 1).""" if not self.choices: raise IndexError("RathLLMChatResponse has no choices") return self.choices[0]
[docs] @dataclass(frozen=True, slots=True) class RathLLMStreamDelta: """One chunk emitted by a streaming completion. Fields are independent and any subset may be populated: - ``content_delta`` carries an assistant text fragment. - ``tool_call_index`` / ``tool_call_id`` / ``tool_call_name_delta`` / ``tool_call_args_delta`` extend an in-progress assistant tool_call. Multiple tool calls in one stream are distinguished by ``tool_call_index``. - ``finish_reason`` is set on the terminal chunk for a choice. - ``usage`` is populated only on the final stream event (and only when the underlying API agreed to report it, e.g. OpenAI's ``stream_options={"include_usage": True}``). """ content_delta: str | None = None tool_call_index: int | None = None tool_call_id: str | None = None tool_call_name_delta: str | None = None tool_call_args_delta: str | None = None finish_reason: RathLLMFinishReason | None = None usage: RathLLMTokenUsage | None = None
[docs] def add_usage( a: RathLLMTokenUsage | None, b: RathLLMTokenUsage | None, ) -> RathLLMTokenUsage | None: """Sum two token usages. Returns ``None`` only when both inputs are ``None`` (so callers can detect that no provider in the chain reported usage). Detail dicts are not merged - they are dropped on the accumulated total because per-call breakdowns don't sum cleanly. """ if a is None: return b if b is None: return a return RathLLMTokenUsage( prompt_tokens=a.prompt_tokens + b.prompt_tokens, completion_tokens=a.completion_tokens + b.completion_tokens, total_tokens=a.total_tokens + b.total_tokens, )