The async version

Setup

from cachy import enable_cachy
enable_cachy()
from IPython.display import display,Image

Async SDK

model = models[1]
cli = AsyncAnthropic()
prompt = "I'm Jeremy"
m = mk_msg(prompt)
r = await cli.messages.create(messages=[m], model=model, max_tokens=100)
r

Hello Jeremy! Nice to meet you. How can I help you today?

  • id: msg_0197EaNqjqZtco5uSw6rYu34
  • content: [{'citations': None, 'text': 'Hello Jeremy! Nice to meet you. How can I help you today?', 'type': 'text'}]
  • model: claude-sonnet-4-5-20250929
  • role: assistant
  • stop_reason: end_turn
  • stop_sequence: None
  • type: message
  • usage: {'cache_creation': {'ephemeral_1h_input_tokens': 0, 'ephemeral_5m_input_tokens': 0}, 'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 10, 'output_tokens': 18, 'server_tool_use': None, 'service_tier': 'standard'}
msgs = mk_msgs([prompt, r, "I forgot my name. Can you remind me please?"]) 
msgs
[{'role': 'user', 'content': "I'm Jeremy"},
 {'role': 'assistant',
  'content': [TextBlock(citations=None, text='Hello Jeremy! Nice to meet you. How can I help you today?', type='text')]},
 {'role': 'user', 'content': 'I forgot my name. Can you remind me please?'}]
await cli.messages.create(messages=msgs, model=model, max_tokens=200)

Of course! Your name is Jeremy.

  • id: msg_01UkN7e1xbcLnvW3ir6nLKAb
  • content: [{'citations': None, 'text': 'Of course! Your name is Jeremy.', 'type': 'text'}]
  • model: claude-sonnet-4-5-20250929
  • role: assistant
  • stop_reason: end_turn
  • stop_sequence: None
  • type: message
  • usage: {'cache_creation': {'ephemeral_1h_input_tokens': 0, 'ephemeral_5m_input_tokens': 0}, 'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 42, 'output_tokens': 11, 'server_tool_use': None, 'service_tier': 'standard'}

source

AsyncClient


def AsyncClient(
    model, cli:NoneType=None, log:bool=False, cache:bool=False
):

Async Anthropic messages client.

Exported source
class AsyncClient(Client):
    def __init__(self, model, cli=None, log=False, cache=False):
        "Async Anthropic messages client."
        super().__init__(model,cli,log,cache)
        if not cli: self.c = AsyncAnthropic(default_headers={'anthropic-beta': 'prompt-caching-2024-07-31'})
c = AsyncClient(model)
c._r(r)
c.use
In: 10; Out: 18; Cache create: 0; Cache read: 0; Total Tokens: 28; Search: 0

source

AsyncClient.__call__


def __call__(
    msgs:list, # List of messages in the dialog
    sp:str='', # The system prompt
    temp:int=0, # Temperature
    maxtok:int=4096, # Maximum tokens
    maxthinktok:int=0, # Maximum thinking tokens
    prefill:str='', # Optional prefill to pass to Claude as start of its response
    stream:bool=False, # Stream response?
    stop:NoneType=None, # Stop sequence
    tools:Optional=None, # List of tools to make available to Claude
    tool_choice:Optional=None, # Optionally force use of some tool
    cb:NoneType=None, # Callback to pass result to when complete
    cli:NoneType=None, log:bool=False, cache:bool=False
):

Make an async call to Claude.

Exported source
@asave_iter
async def _astream(o, cm, prefill, cb):
    async with cm as s:
        yield prefill
        async for x in s.text_stream: yield x
        o.value = await s.get_final_message()
        await cb(o.value)
Exported source
@patch
@delegates(Client)
async def __call__(self:AsyncClient,
             msgs:list, # List of messages in the dialog
             sp='', # The system prompt
             temp=0, # Temperature
             maxtok=4096, # Maximum tokens
             maxthinktok=0, # Maximum thinking tokens
             prefill='', # Optional prefill to pass to Claude as start of its response
             stream:bool=False, # Stream response?
             stop=None, # Stop sequence
             tools:Optional[list]=None, # List of tools to make available to Claude
             tool_choice:Optional[dict]=None, # Optionally force use of some tool
             cb=None, # Callback to pass result to when complete
             **kwargs):
    "Make an async call to Claude."
    msgs,kwargs = self._precall(msgs, prefill, sp, temp, maxtok, maxthinktok, stream,
                                stop, tools, tool_choice, kwargs)
    m = self.c.messages
    f = m.stream if stream else m.create
    res = f(model=self.model, messages=msgs, **kwargs)
    async def _cb(v):
        self._log(v, prefill=prefill, msgs=msgs, **kwargs)
        if cb: await cb(v)
    if stream: return _astream(res, prefill, _cb)
    res = await res
    try: return res
    finally: await _cb(res)
c = AsyncClient(model, log=True)
c.use
In: 0; Out: 0; Cache create: 0; Cache read: 0; Total Tokens: 0; Search: 0
c.model = models[1]
await c('Hi')

Hello! How can I help you today?

  • id: msg_01QXCxYb2yRGsP7sia4UF71w
  • content: [{'citations': None, 'text': 'Hello! How can I help you today?', 'type': 'text'}]
  • model: claude-sonnet-4-5-20250929
  • role: assistant
  • stop_reason: end_turn
  • stop_sequence: None
  • type: message
  • usage: {'cache_creation': {'ephemeral_1h_input_tokens': 0, 'ephemeral_5m_input_tokens': 0}, 'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 8, 'output_tokens': 12, 'server_tool_use': None, 'service_tier': 'standard'}
c.use
In: 8; Out: 12; Cache create: 0; Cache read: 0; Total Tokens: 20; Search: 0
q = "Very concisely, what is the meaning of life?"
pref = 'According to Douglas Adams,'
await c(q, prefill=pref)

According to Douglas Adams, it’s 42.

More seriously: Create meaning through connections, growth, and contribution to something beyond yourself.

  • id: msg_01AyipEe57GjCpju56iQtqRr
  • content: [{'citations': None, 'text': "According to Douglas Adams, it's 42.\n\nMore seriously: Create meaning through connections, growth, and contribution to something beyond yourself.", 'type': 'text'}]
  • model: claude-sonnet-4-5-20250929
  • role: assistant
  • stop_reason: end_turn
  • stop_sequence: None
  • type: message
  • usage: {'cache_creation': {'ephemeral_1h_input_tokens': 0, 'ephemeral_5m_input_tokens': 0}, 'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 24, 'output_tokens': 27, 'server_tool_use': None, 'service_tier': 'standard'}
c.use
In: 32; Out: 39; Cache create: 0; Cache read: 0; Total Tokens: 71; Search: 0
r = await c(q, prefill=pref, stream=True)
async for o in r: print(o, end='')
r.value
According to Douglas Adams,  it's 42.

More seriously: Create meaning through connections, growth, and contribution to something beyond yourself.

According to Douglas Adams, it’s 42.

More seriously: Create meaning through connections, growth, and contribution to something beyond yourself.

  • id: msg_01HwpyRuSLFi66AuuCuGSwLU
  • content: [{'citations': None, 'text': "According to Douglas Adams, it's 42.\n\nMore seriously: Create meaning through connections, growth, and contribution to something beyond yourself.", 'type': 'text'}]
  • model: claude-sonnet-4-5-20250929
  • role: assistant
  • stop_reason: end_turn
  • stop_sequence: None
  • type: message
  • usage: {'cache_creation': {'ephemeral_1h_input_tokens': 0, 'ephemeral_5m_input_tokens': 0}, 'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 24, 'output_tokens': 27, 'server_tool_use': None, 'service_tier': 'standard'}
c.use
In: 56; Out: 66; Cache create: 0; Cache read: 0; Total Tokens: 122; Search: 0
def sums(
    a:int,  # First thing to sum
    b:int=1 # Second thing to sum
) -> int: # The sum of the inputs
    "Adds a + b."
    print(f"Finding the sum of {a} and {b}")
    return a + b
a,b = 604542,6458932
pr = f"What is {a}+{b}?"
sp = "You are a summing expert."
tools=[sums]
choice = mk_tool_choice('sums')
choice
{'type': 'tool', 'name': 'sums'}
msgs = mk_msgs(pr)
r = await c(msgs, sp=sp, tools=tools, tool_choice=choice)
r

[ToolUseBlock(id=‘toolu_019n1R5kwrbTSmGZ1TcrU8b4’, input={‘a’: 604542, ‘b’: 6458932}, name=‘sums’, type=‘tool_use’)]

  • id: msg_01GuiEDm9UCKoZGhW9vqBAc9
  • content: [{'id': 'toolu_019n1R5kwrbTSmGZ1TcrU8b4', 'input': {'a': 604542, 'b': 6458932}, 'name': 'sums', 'type': 'tool_use'}]
  • model: claude-sonnet-4-5-20250929
  • role: assistant
  • stop_reason: tool_use
  • stop_sequence: None
  • type: message
  • usage: {'cache_creation': {'ephemeral_1h_input_tokens': 0, 'ephemeral_5m_input_tokens': 0}, 'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 712, 'output_tokens': 57, 'server_tool_use': None, 'service_tier': 'standard'}

source

mk_funcres_async


def mk_funcres_async(
    fc, ns
):

Given tool use block fc, get tool result, and create a tool_result response.


source

mk_toolres_async


def mk_toolres_async(
    r:Mapping, # Tool use request response from Claude
    ns:Optional=None, # Namespace to search for tools
):

Create a tool_result message from response r.

tr = await mk_toolres_async(r, ns=globals())
tr
Finding the sum of 604542 and 6458932
[{'role': 'assistant',
  'content': [{'id': 'toolu_019n1R5kwrbTSmGZ1TcrU8b4',
    'input': {'a': 604542, 'b': 6458932},
    'name': 'sums',
    'type': 'tool_use'}]},
 {'role': 'user',
  'content': [{'type': 'tool_result',
    'tool_use_id': 'toolu_019n1R5kwrbTSmGZ1TcrU8b4',
    'content': '7063474'}]}]
msgs += tr
r = contents(await c(msgs, sp=sp, tools=sums))
r
'The sum of 604542 + 6458932 = **7,063,474**'

Structured Output


source

AsyncClient.structured


def structured(
    msgs:list, # List of messages in the dialog
    tools:Optional=None, # List of tools to make available to Claude
    ns:Optional=None, # Namespace to search for tools
    sp:str='', # The system prompt
    temp:int=0, # Temperature
    maxtok:int=4096, # Maximum tokens
    maxthinktok:int=0, # Maximum thinking tokens
    prefill:str='', # Optional prefill to pass to Claude as start of its response
    stream:bool=False, # Stream response?
    stop:NoneType=None, # Stop sequence
    tool_choice:Optional=None, # Optionally force use of some tool
    cb:NoneType=None, # Callback to pass result to when complete
    metadata:MetadataParam \| Omit=<anthropic.Omit object at 0x7f70a1e0e6e0>,
    service_tier:Literal['auto', 'standard_only'] \| Omit=<anthropic.Omit object at 0x7f70a1e0e6e0>,
    stop_sequences:SequenceNotStr[str] \| Omit=<anthropic.Omit object at 0x7f70a1e0e6e0>,
    system:Union[str, Iterable[TextBlockParam]] \| Omit=<anthropic.Omit object at 0x7f70a1e0e6e0>,
    temperature:float \| Omit=<anthropic.Omit object at 0x7f70a1e0e6e0>,
    thinking:ThinkingConfigParam \| Omit=<anthropic.Omit object at 0x7f70a1e0e6e0>,
    top_k:int \| Omit=<anthropic.Omit object at 0x7f70a1e0e6e0>,
    top_p:float \| Omit=<anthropic.Omit object at 0x7f70a1e0e6e0>,
    extra_headers:Headers \| None=None, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
The extra values given here take precedence over values defined on the client or passed to this method.
    extra_query:Query \| None=None, extra_body:Body \| None=None,
    timeout:float \| httpx.Timeout \| None \| NotGiven=NOT_GIVEN
):

Return the value of all tool calls (generally used for structured outputs)

await c.structured(pr, sums)
Finding the sum of 604542 and 6458932
[7063474]
c

[ToolUseBlock(id=‘toolu_01WRUTfxTaXDbxfar32GqnMP’, input={‘a’: 604542, ‘b’: 6458932}, name=‘sums’, type=‘tool_use’)]

Metric Count Cost (USD)
Input tokens 4,304 0.012912
Output tokens 342 0.005130
Cache tokens 0 0.000000
Server tool use 0 0.000000
Total 4,646 $0.018042

AsyncChat


source

AsyncChat


def AsyncChat(
    model:Optional=None, # Model to use (leave empty if passing `cli`)
    cli:Optional=None, # Client to use (leave empty if passing `model`)
    sp:str='', # Optional system prompt
    tools:Optional=None, # List of tools to make available to Claude
    temp:int=0, # Temperature
    cont_pr:Optional=None, # User prompt to continue an assistant response
    cache:bool=False, # Use Claude cache?
    hist:list=None, # Initialize history
    ns:Optional=None, # Namespace to search for tools
):

Anthropic async chat client.

Exported source
@delegates()
class AsyncChat(Chat):
    def __init__(self,
                 model:Optional[str]=None, # Model to use (leave empty if passing `cli`)
                 cli:Optional[Client]=None, # Client to use (leave empty if passing `model`)
                 **kwargs):
        "Anthropic async chat client."
        super().__init__(model, cli, **kwargs)
        if not cli: self.c = AsyncClient(model)
sp = "Always use tools if available, and calculations are requested."
chat = AsyncChat(model, sp=sp)
chat.c.use, chat.h
(In: 0; Out: 0; Cache create: 0; Cache read: 0; Total Tokens: 0; Search: 0, [])

source

AsyncChat.__call__


def __call__(
    pr:NoneType=None, # Prompt / message
    temp:NoneType=None, # Temperature
    maxtok:int=4096, # Maximum tokens
    maxthinktok:int=0, # Maximum thinking tokens
    stream:bool=False, # Stream response?
    prefill:str='', # Optional prefill to pass to Claude as start of its response
    tool_choice:Union=None, # Optionally force use of some tool
    kw:VAR_KEYWORD
):

Call self as a function.

Exported source
@patch
async def _append_pr(self:AsyncChat, pr=None):
    prev_role = nested_idx(self.h, -1, 'role') if self.h else 'assistant' # First message should be 'user' if no history
    if pr and prev_role == 'user': await self()
    self._post_pr(pr, prev_role)
Exported source
@patch
async def __call__(self:AsyncChat,
                   pr=None,  # Prompt / message
                   temp=None, # Temperature
                   maxtok=4096, # Maximum tokens
                   maxthinktok=0, # Maximum thinking tokens
                   stream=False, # Stream response?
                   prefill='', # Optional prefill to pass to Claude as start of its response
                   tool_choice:Optional[Union[str,bool,dict]]=None, # Optionally force use of some tool
                   **kw):
    if temp is None: temp=self.temp
    await self._append_pr(pr)
    async def _cb(v):
        self.last = await mk_toolres_async(v, ns=limit_ns(self.ns, self.tools, tool_choice))
        self.h += self.last
    return await self.c(self.h, stream=stream, prefill=prefill, sp=self.sp, temp=temp, maxtok=maxtok, maxthinktok=maxthinktok, tools=self.tools, tool_choice=tool_choice, cb=_cb, **kw)
await chat("I'm Jeremy")
await chat("What's my name?")

Your name is Jeremy! You told me that at the start of our conversation.

  • id: msg_011gVfJUTyqUBreT3u9ej2xM
  • content: [{'citations': None, 'text': 'Your name is Jeremy! You told me that at the start of our conversation.', 'type': 'text'}]
  • model: claude-sonnet-4-5-20250929
  • role: assistant
  • stop_reason: end_turn
  • stop_sequence: None
  • type: message
  • usage: {'cache_creation': {'ephemeral_1h_input_tokens': 0, 'ephemeral_5m_input_tokens': 0}, 'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 47, 'output_tokens': 19, 'server_tool_use': None, 'service_tier': 'standard'}
q = "Very concisely, what is the meaning of life?"
pref = 'According to Douglas Adams,'
await chat(q, prefill=pref)

According to Douglas Adams, it’s 42.

More seriously: to find purpose through connection, growth, and contributing something meaningful to others.

  • id: msg_01PEtrNFwjv7daER26fdzatn
  • content: [{'citations': None, 'text': "According to Douglas Adams, it's 42. \n\nMore seriously: to find purpose through connection, growth, and contributing something meaningful to others.", 'type': 'text'}]
  • model: claude-sonnet-4-5-20250929
  • role: assistant
  • stop_reason: end_turn
  • stop_sequence: None
  • type: message
  • usage: {'cache_creation': {'ephemeral_1h_input_tokens': 0, 'ephemeral_5m_input_tokens': 0}, 'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 86, 'output_tokens': 29, 'server_tool_use': None, 'service_tier': 'standard'}
chat = AsyncChat(model, sp=sp)
r = await chat("I'm Jeremy", stream=True)
async for o in r: print(o, end='')
r.value
Hello Jeremy! Nice to meet you. How can I help you today?

Hello Jeremy! Nice to meet you. How can I help you today?

  • id: msg_01C3ZDw8mwZioFsp4Xm8qPSx
  • content: [{'citations': None, 'text': 'Hello Jeremy! Nice to meet you. How can I help you today?', 'type': 'text'}]
  • model: claude-sonnet-4-5-20250929
  • role: assistant
  • stop_reason: end_turn
  • stop_sequence: None
  • type: message
  • usage: {'cache_creation': {'ephemeral_1h_input_tokens': 0, 'ephemeral_5m_input_tokens': 0}, 'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 21, 'output_tokens': 18, 'server_tool_use': None, 'service_tier': 'standard'}
pr = f"What is {a}+{b}?"
chat = AsyncChat(model, sp=sp, tools=[sums])
r = await chat(pr)
r
Finding the sum of 604542 and 6458932

[ToolUseBlock(id=‘toolu_012wP54FgYSiSiQ2Sy6ivNtD’, input={‘a’: 604542, ‘b’: 6458932}, name=‘sums’, type=‘tool_use’)]

  • id: msg_01CsTZMd2xg82kqTz6N6Ui4t
  • content: [{'id': 'toolu_012wP54FgYSiSiQ2Sy6ivNtD', 'input': {'a': 604542, 'b': 6458932}, 'name': 'sums', 'type': 'tool_use'}]
  • model: claude-sonnet-4-5-20250929
  • role: assistant
  • stop_reason: tool_use
  • stop_sequence: None
  • type: message
  • usage: {'cache_creation': {'ephemeral_1h_input_tokens': 0, 'ephemeral_5m_input_tokens': 0}, 'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 624, 'output_tokens': 72, 'server_tool_use': None, 'service_tier': 'standard'}
await chat()

The sum of 604542 + 6458932 = 7,063,474

  • id: msg_01HGhwCoQN4sbWZimSbky4ck
  • content: [{'citations': None, 'text': 'The sum of 604542 + 6458932 = **7,063,474**', 'type': 'text'}]
  • model: claude-sonnet-4-5-20250929
  • role: assistant
  • stop_reason: end_turn
  • stop_sequence: None
  • type: message
  • usage: {'cache_creation': {'ephemeral_1h_input_tokens': 0, 'ephemeral_5m_input_tokens': 0}, 'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 712, 'output_tokens': 24, 'server_tool_use': None, 'service_tier': 'standard'}

AsyncChat handles missing tools gracefully. When a tool is called but not found in the namespace, it returns an error message rather than crashing.

In this test, we intentionally empty the namespace (chat.ns={}) to simulate what would happen if Claude hallucinated a tool or if a tool was missing. The limit_ns function (used in the _cb callback) would normally filter out hallucinated tools, but here we’re testing the fallback behavior when tools aren’t available.

pr = f"What is {a}+{b}?"
chat = AsyncChat(model, sp=sp, tools=[sums])
chat.ns={}
r = await chat(pr)
r

[ToolUseBlock(id=‘toolu_012wP54FgYSiSiQ2Sy6ivNtD’, input={‘a’: 604542, ‘b’: 6458932}, name=‘sums’, type=‘tool_use’)]

  • id: msg_01CsTZMd2xg82kqTz6N6Ui4t
  • content: [{'id': 'toolu_012wP54FgYSiSiQ2Sy6ivNtD', 'input': {'a': 604542, 'b': 6458932}, 'name': 'sums', 'type': 'tool_use'}]
  • model: claude-sonnet-4-5-20250929
  • role: assistant
  • stop_reason: tool_use
  • stop_sequence: None
  • type: message
  • usage: {'cache_creation': {'ephemeral_1h_input_tokens': 0, 'ephemeral_5m_input_tokens': 0}, 'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 624, 'output_tokens': 72, 'server_tool_use': None, 'service_tier': 'standard'}
chat.h
[{'role': 'user', 'content': 'What is 604542+6458932?'},
 {'role': 'assistant',
  'content': [{'id': 'toolu_012wP54FgYSiSiQ2Sy6ivNtD',
    'input': {'a': 604542, 'b': 6458932},
    'name': 'sums',
    'type': 'tool_use'}]},
 {'role': 'user',
  'content': [{'type': 'tool_result',
    'tool_use_id': 'toolu_012wP54FgYSiSiQ2Sy6ivNtD',
    'content': 'Error - tool not defined in the tool_schemas: sums'}]}]
fn = Path('samples/puppy.jpg')
img = fn.read_bytes()
Image(img)

q = "In brief, what color flowers are in this image?"
msg = mk_msg([img, q])
await c([msg])

The flowers in this image are purple.

  • id: msg_01JPnMvi1UmuspTeQYG8RQiC
  • content: [{'citations': None, 'text': 'The flowers in this image are **purple**.', 'type': 'text'}]
  • model: claude-sonnet-4-5-20250929
  • role: assistant
  • stop_reason: end_turn
  • stop_sequence: None
  • type: message
  • usage: {'cache_creation': {'ephemeral_1h_input_tokens': 0, 'ephemeral_5m_input_tokens': 0}, 'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 110, 'output_tokens': 12, 'server_tool_use': None, 'service_tier': 'standard'}

Add cache=True to automatically add to Claude’s KV cache.

chat = AsyncChat(model, sp=sp, cache=True)
await chat("Lorem ipsum dolor sit amet" * 150)

I notice you’ve sent me the phrase “Lorem ipsum dolor sit amet” repeated many times. “Lorem ipsum” is placeholder text commonly used in design and publishing to demonstrate visual form without meaningful content.

Is there something specific I can help you with? For example: - Do you have a question or task you’d like assistance with? - Were you testing something? - Did you mean to send different content?

I’m here to help with a wide range of tasks including answering questions, analysis, writing, problem-solving, and more. Please let me know what you need!

  • id: msg_015it5oCQhxJAYuZzSqag6DC
  • content: [{'citations': None, 'text': 'I notice you\'ve sent me the phrase "Lorem ipsum dolor sit amet" repeated many times. "Lorem ipsum" is placeholder text commonly used in design and publishing to demonstrate visual form without meaningful content.\n\nIs there something specific I can help you with? For example:\n- Do you have a question or task you\'d like assistance with?\n- Were you testing something?\n- Did you mean to send different content?\n\nI\'m here to help with a wide range of tasks including answering questions, analysis, writing, problem-solving, and more. Please let me know what you need!', 'type': 'text'}]
  • model: claude-sonnet-4-5-20250929
  • role: assistant
  • stop_reason: end_turn
  • stop_sequence: None
  • type: message
  • usage: {'cache_creation': {'ephemeral_1h_input_tokens': 0, 'ephemeral_5m_input_tokens': 1063}, 'cache_creation_input_tokens': 1063, 'cache_read_input_tokens': 0, 'input_tokens': 3, 'output_tokens': 125, 'server_tool_use': None, 'service_tier': 'standard'}
chat.use
In: 3; Out: 125; Cache create: 1063; Cache read: 0; Total Tokens: 1191; Search: 0

In this followup call, nearly all the tokens are cached, so the only the new additional tokens are charged at the full rate.

await chat("Whoops, sorry about that!")

No problem at all! These things happen. 😊

How can I help you today? Feel free to ask me anything or let me know what you’re working on!

  • id: msg_01GmADYrSiU3m84ACDf3Wvr6
  • content: [{'citations': None, 'text': "No problem at all! These things happen. 😊\n\nHow can I help you today? Feel free to ask me anything or let me know what you're working on!", 'type': 'text'}]
  • model: claude-sonnet-4-5-20250929
  • role: assistant
  • stop_reason: end_turn
  • stop_sequence: None
  • type: message
  • usage: {'cache_creation': {'ephemeral_1h_input_tokens': 0, 'ephemeral_5m_input_tokens': 1199}, 'cache_creation_input_tokens': 1199, 'cache_read_input_tokens': 0, 'input_tokens': 3, 'output_tokens': 39, 'server_tool_use': None, 'service_tier': 'standard'}
chat.use
In: 6; Out: 164; Cache create: 2262; Cache read: 0; Total Tokens: 2432; Search: 0

Extended Thinking

Let’s call the model without extended thinking enabled.

chat = AsyncChat(model)
await chat("Write a sentence about Python!")

Python is a versatile, high-level programming language known for its clean syntax and readability, making it popular for everything from web development to data science and machine learning.

  • id: msg_013GJfKGhDZwj9n9cNEoYRW5
  • content: [{'citations': None, 'text': 'Python is a versatile, high-level programming language known for its clean syntax and readability, making it popular for everything from web development to data science and machine learning.', 'type': 'text'}]
  • model: claude-sonnet-4-5-20250929
  • role: assistant
  • stop_reason: end_turn
  • stop_sequence: None
  • type: message
  • usage: {'cache_creation': {'ephemeral_1h_input_tokens': 0, 'ephemeral_5m_input_tokens': 0}, 'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 13, 'output_tokens': 38, 'server_tool_use': None, 'service_tier': 'standard'}

Now, let’s call the model with extended thinking enabled.

r = await chat("Write a sentence about Python!", maxthinktok=1024)
r

Python’s extensive standard library and vast ecosystem of third-party packages make it an excellent choice for rapidly developing applications across diverse domains.

Thinking The user is asking me to write another sentence about Python. I should provide a different sentence than before to keep it interesting and informative.
  • id: msg_01Y4N6eR7socsGLeVJs6oapd
  • content: [{'signature': 'EsACCkYIChgCKkC+YytasVsIu50+vbFqeiRvPJF8hAUKM6cBDM1n6UwzdeHo6ueIax7YzrcBwYyQeyaxRYBA9oGmIB7Om5+cQNQIEgzUO2dJLD+Q4RfuICwaDLed+ZrwJ+ghnB9lpCIw8LzGix1ZyAUUBgu7Z7IBBsrAHi9JLu8IazIGFQ7dlaNg0xCx1TruTDOtO/ne4BkMKqcBeC/gMvJu56gzQqnlyJmhAOU4YWKjoI6APmdNhe9lTLBMKlWcwGBqoyFSJqLdk5UTQQTrz94JQddkHAYCuYW8XfPcu75puFzn2xi/ulu37N8FuZk4gDzFImVNteeMz9FST29vsVywdM85Pt1H3n7EMS0EhkT5XZOhbrkZmKjsA4a9FdmOJAIEBbD6IxFuE5FV+gMcvO5aoY3X8bteAM6KHEyyzYfje8UYAQ==', 'thinking': 'The user is asking me to write another sentence about Python. I should provide a different sentence than before to keep it interesting and informative.', 'type': 'thinking'}, {'citations': None, 'text': "Python's extensive standard library and vast ecosystem of third-party packages make it an excellent choice for rapidly developing applications across diverse domains.", 'type': 'text'}]
  • model: claude-sonnet-4-5-20250929
  • role: assistant
  • stop_reason: end_turn
  • stop_sequence: None
  • type: message
  • usage: {'cache_creation': {'ephemeral_1h_input_tokens': 0, 'ephemeral_5m_input_tokens': 0}, 'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 89, 'output_tokens': 65, 'server_tool_use': None, 'service_tier': 'standard'}
r.content
[ThinkingBlock(signature='EsACCkYIChgCKkC+YytasVsIu50+vbFqeiRvPJF8hAUKM6cBDM1n6UwzdeHo6ueIax7YzrcBwYyQeyaxRYBA9oGmIB7Om5+cQNQIEgzUO2dJLD+Q4RfuICwaDLed+ZrwJ+ghnB9lpCIw8LzGix1ZyAUUBgu7Z7IBBsrAHi9JLu8IazIGFQ7dlaNg0xCx1TruTDOtO/ne4BkMKqcBeC/gMvJu56gzQqnlyJmhAOU4YWKjoI6APmdNhe9lTLBMKlWcwGBqoyFSJqLdk5UTQQTrz94JQddkHAYCuYW8XfPcu75puFzn2xi/ulu37N8FuZk4gDzFImVNteeMz9FST29vsVywdM85Pt1H3n7EMS0EhkT5XZOhbrkZmKjsA4a9FdmOJAIEBbD6IxFuE5FV+gMcvO5aoY3X8bteAM6KHEyyzYfje8UYAQ==', thinking='The user is asking me to write another sentence about Python. I should provide a different sentence than before to keep it interesting and informative.', type='thinking'),
 TextBlock(citations=None, text="Python's extensive standard library and vast ecosystem of third-party packages make it an excellent choice for rapidly developing applications across diverse domains.", type='text')]