Skip to content

Commit 3651c2e

Browse files
committed
improve tool calling outside of reasoning blocks, improve code interpreter documentation around async
1 parent 3a2e9b1 commit 3651c2e

File tree

5 files changed

+195
-78
lines changed

5 files changed

+195
-78
lines changed

tools/server/public/index.html.gz

533 Bytes
Binary file not shown.

tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte

Lines changed: 91 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,14 @@
2525
import { SvelteSet } from 'svelte/reactivity';
2626
2727
type ToolSegment =
28+
| { kind: 'content'; content: string; parentId: string }
2829
| { kind: 'thinking'; content: string }
29-
| { kind: 'tool'; toolCalls: ApiChatCompletionToolCall[]; parentId: string };
30+
| {
31+
kind: 'tool';
32+
toolCalls: ApiChatCompletionToolCall[];
33+
parentId: string;
34+
inThinking: boolean;
35+
};
3036
type ToolParsed = { expression?: string; result?: string; duration_ms?: number };
3137
type CollectedToolMessage = {
3238
toolCallId?: string | null;
@@ -115,6 +121,11 @@
115121
toolMessagesCollectedProp ?? (message as MessageWithToolExtras)._toolMessagesCollected ?? null
116122
);
117123
124+
let hasRegularContent = $derived.by(() => {
125+
if (messageContent?.trim()) return true;
126+
return (segments ?? []).some((s) => s.kind === 'content' && Boolean(s.content?.trim()));
127+
});
128+
118129
const toolCalls = $derived(
119130
Array.isArray(toolCallContent) ? (toolCallContent as ApiChatCompletionToolCall[]) : null
120131
);
@@ -265,6 +276,14 @@
265276
if (name === 'code_interpreter_javascript') return 'Code Interpreter (JavaScript)';
266277
return name || `Call #${index + 1}`;
267278
}
279+
280+
function segmentToolInThinking(segment: ToolSegment): boolean {
281+
if (segment.kind !== 'tool') return false;
282+
const maybe = segment as unknown as { inThinking?: unknown };
283+
if (typeof maybe.inThinking === 'boolean') return maybe.inThinking;
284+
// Back-compat fallback: if we don't know, treat as in-reasoning when there is a thinking block.
285+
return Boolean(thinkingContent);
286+
}
268287
</script>
269288

270289
<div
@@ -276,15 +295,15 @@
276295
<ChatMessageThinkingBlock
277296
reasoningContent={segments && segments.length ? null : thinkingContent}
278297
isStreaming={!message.timestamp || isLoading()}
279-
hasRegularContent={!!messageContent?.trim()}
298+
{hasRegularContent}
280299
>
281300
{#if segments && segments.length}
282301
{#each segments as segment, segIndex (segIndex)}
283302
{#if segment.kind === 'thinking'}
284303
<div class="text-xs leading-relaxed break-words whitespace-pre-wrap">
285304
{segment.content}
286305
</div>
287-
{:else if segment.kind === 'tool'}
306+
{:else if segment.kind === 'tool' && segmentToolInThinking(segment)}
288307
{#each segment.toolCalls as toolCall, index (toolCall.id ?? `${segIndex}-${index}`)}
289308
{@const argsParsed = parseArguments(toolCall)}
290309
{@const parsed = advanceToolResult(toolCall)}
@@ -354,75 +373,6 @@
354373
</ChatMessageThinkingBlock>
355374
{/if}
356375

357-
{#if !thinkingContent && segments && segments.length}
358-
{#each segments as segment, segIndex (segIndex)}
359-
{#if segment.kind === 'tool'}
360-
{#each segment.toolCalls as toolCall, index (toolCall.id ?? `${segIndex}-${index}`)}
361-
{@const argsParsed = parseArguments(toolCall)}
362-
{@const parsed = advanceToolResult(toolCall)}
363-
{@const collectedResult = toolMessagesCollected
364-
? toolMessagesCollected.find((c) => c.toolCallId === toolCall.id)?.parsed?.result
365-
: undefined}
366-
{@const collectedDurationMs = toolMessagesCollected
367-
? toolMessagesCollected.find((c) => c.toolCallId === toolCall.id)?.parsed?.duration_ms
368-
: undefined}
369-
{@const durationMs = parsed?.duration_ms ?? collectedDurationMs}
370-
{@const durationText = formatDurationSeconds(durationMs)}
371-
<div
372-
class="mt-2 space-y-1 rounded-md border border-dashed border-muted-foreground/40 bg-muted/40 px-2.5 py-2"
373-
data-testid="tool-call-block"
374-
>
375-
<div class="flex items-center justify-between gap-2">
376-
<div class="flex items-center gap-1 text-xs font-semibold">
377-
<Wrench class="h-3.5 w-3.5" />
378-
<span>{getToolLabel(toolCall, index)}</span>
379-
</div>
380-
{#if durationText}
381-
<BadgeChatStatistic icon={Clock} value={durationText} />
382-
{/if}
383-
</div>
384-
{#if argsParsed}
385-
<div class="text-[12px] text-muted-foreground">Arguments</div>
386-
{#if 'pairs' in argsParsed}
387-
{#each argsParsed.pairs as pair (pair.key)}
388-
<div class="mt-1 rounded-sm bg-background/70 px-2 py-1.5">
389-
<div class="text-[12px] font-semibold text-foreground">{pair.key}</div>
390-
{#if pair.key === 'code' && toolCall.function?.name === 'code_interpreter_javascript'}
391-
<MarkdownContent
392-
class="mt-0.5 text-[12px] leading-snug"
393-
content={toFencedCodeBlock(pair.value, 'javascript')}
394-
/>
395-
{:else}
396-
<pre
397-
class="mt-0.5 font-mono text-[12px] leading-snug break-words whitespace-pre-wrap">
398-
{pair.value}
399-
</pre>
400-
{/if}
401-
</div>
402-
{/each}
403-
{:else}
404-
<pre class="font-mono text-[12px] leading-snug break-words whitespace-pre-wrap">
405-
{argsParsed.raw}
406-
</pre>
407-
{/if}
408-
{/if}
409-
{#if parsed && parsed.result !== undefined}
410-
<div class="text-[12px] text-muted-foreground">Result</div>
411-
<div class="rounded-sm bg-background/80 px-2 py-1 font-mono text-[12px]">
412-
{parsed.result}
413-
</div>
414-
{:else if collectedResult !== undefined}
415-
<div class="text-[12px] text-muted-foreground">Result</div>
416-
<div class="rounded-sm bg-background/80 px-2 py-1 font-mono text-[12px]">
417-
{collectedResult}
418-
</div>
419-
{/if}
420-
</div>
421-
{/each}
422-
{/if}
423-
{/each}
424-
{/if}
425-
426376
{#if message?.role === 'assistant' && isLoading() && !message?.content?.trim()}
427377
<div class="mt-6 w-full max-w-[48rem]" in:fade>
428378
<div class="processing-container">
@@ -474,6 +424,75 @@
474424
{:else if message.role === 'assistant'}
475425
{#if config().disableReasoningFormat}
476426
<pre class="raw-output">{messageContent}</pre>
427+
{:else if segments && segments.length}
428+
{#each segments as segment, segIndex (segIndex)}
429+
{#if segment.kind === 'content'}
430+
<MarkdownContent content={segment.content ?? ''} />
431+
{:else if segment.kind === 'tool' && (!thinkingContent || !segmentToolInThinking(segment))}
432+
{#each segment.toolCalls as toolCall, index (toolCall.id ?? `${segIndex}-${index}`)}
433+
{@const argsParsed = parseArguments(toolCall)}
434+
{@const parsed = advanceToolResult(toolCall)}
435+
{@const collectedResult = toolMessagesCollected
436+
? toolMessagesCollected.find((c) => c.toolCallId === toolCall.id)?.parsed?.result
437+
: undefined}
438+
{@const collectedDurationMs = toolMessagesCollected
439+
? toolMessagesCollected.find((c) => c.toolCallId === toolCall.id)?.parsed?.duration_ms
440+
: undefined}
441+
{@const durationMs = parsed?.duration_ms ?? collectedDurationMs}
442+
{@const durationText = formatDurationSeconds(durationMs)}
443+
<div
444+
class="mt-2 space-y-1 rounded-md border border-dashed border-muted-foreground/40 bg-muted/40 px-2.5 py-2"
445+
data-testid="tool-call-block"
446+
>
447+
<div class="flex items-center justify-between gap-2">
448+
<div class="flex items-center gap-1 text-xs font-semibold">
449+
<Wrench class="h-3.5 w-3.5" />
450+
<span>{getToolLabel(toolCall, index)}</span>
451+
</div>
452+
{#if durationText}
453+
<BadgeChatStatistic icon={Clock} value={durationText} />
454+
{/if}
455+
</div>
456+
{#if argsParsed}
457+
<div class="text-[12px] text-muted-foreground">Arguments</div>
458+
{#if 'pairs' in argsParsed}
459+
{#each argsParsed.pairs as pair (pair.key)}
460+
<div class="mt-1 rounded-sm bg-background/70 px-2 py-1.5">
461+
<div class="text-[12px] font-semibold text-foreground">{pair.key}</div>
462+
{#if pair.key === 'code' && toolCall.function?.name === 'code_interpreter_javascript'}
463+
<MarkdownContent
464+
class="mt-0.5 text-[12px] leading-snug"
465+
content={toFencedCodeBlock(pair.value, 'javascript')}
466+
/>
467+
{:else}
468+
<pre
469+
class="mt-0.5 font-mono text-[12px] leading-snug break-words whitespace-pre-wrap">
470+
{pair.value}
471+
</pre>
472+
{/if}
473+
</div>
474+
{/each}
475+
{:else}
476+
<pre class="font-mono text-[12px] leading-snug break-words whitespace-pre-wrap">
477+
{argsParsed.raw}
478+
</pre>
479+
{/if}
480+
{/if}
481+
{#if parsed && parsed.result !== undefined}
482+
<div class="text-[12px] text-muted-foreground">Result</div>
483+
<div class="rounded-sm bg-background/80 px-2 py-1 font-mono text-[12px]">
484+
{parsed.result}
485+
</div>
486+
{:else if collectedResult !== undefined}
487+
<div class="text-[12px] text-muted-foreground">Result</div>
488+
<div class="rounded-sm bg-background/80 px-2 py-1 font-mono text-[12px]">
489+
{collectedResult}
490+
</div>
491+
{/if}
492+
</div>
493+
{/each}
494+
{/if}
495+
{/each}
477496
{:else}
478497
<MarkdownContent content={messageContent ?? ''} />
479498
{/if}

tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessages.svelte

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,14 @@
5050
});
5151
5252
type ToolSegment =
53+
| { kind: 'content'; content: string; parentId: string }
5354
| { kind: 'thinking'; content: string }
54-
| { kind: 'tool'; toolCalls: ApiChatCompletionToolCall[]; parentId: string };
55+
| {
56+
kind: 'tool';
57+
toolCalls: ApiChatCompletionToolCall[];
58+
parentId: string;
59+
inThinking: boolean;
60+
};
5561
type CollectedToolMessage = {
5662
toolCallId?: string | null;
5763
parsed: { expression?: string; result?: string; duration_ms?: number };
@@ -161,6 +167,7 @@
161167
// Collapse consecutive assistant/tool chains into one display message
162168
const toolParentIds: string[] = [];
163169
const thinkingParts: string[] = [];
170+
const contentParts: string[] = [];
164171
const toolCallsCombined: ApiChatCompletionToolCall[] = [];
165172
const segments: ToolSegment[] = [];
166173
const toolMessagesCollected: CollectedToolMessage[] = [];
@@ -176,6 +183,16 @@
176183
thinkingParts.push(currentAssistant.thinking);
177184
segments.push({ kind: 'thinking', content: currentAssistant.thinking });
178185
}
186+
187+
const hasContent = Boolean(currentAssistant.content?.trim());
188+
if (hasContent) {
189+
contentParts.push(currentAssistant.content);
190+
segments.push({
191+
kind: 'content',
192+
content: currentAssistant.content,
193+
parentId: currentAssistant.id
194+
});
195+
}
179196
let thisAssistantToolCalls: ApiChatCompletionToolCall[] = [];
180197
if (currentAssistant.toolCalls) {
181198
try {
@@ -196,7 +213,10 @@
196213
segments.push({
197214
kind: 'tool',
198215
toolCalls: thisAssistantToolCalls,
199-
parentId: currentAssistant.id
216+
parentId: currentAssistant.id,
217+
// Heuristic: only treat tool calls as "in reasoning" when the assistant hasn't
218+
// started emitting user-visible content yet.
219+
inThinking: Boolean(currentAssistant.thinking) && !hasContent
200220
});
201221
}
202222
@@ -248,7 +268,8 @@
248268
249269
const mergedAssistant: AssistantDisplayMessage = {
250270
...(currentAssistant ?? msg),
251-
content: currentAssistant?.content ?? '',
271+
// Keep a plain-text combined content for edit/copy; display can use `_segments` for ordering.
272+
content: contentParts.filter(Boolean).join('\n\n'),
252273
thinking: thinkingParts.filter(Boolean).join('\n\n'),
253274
toolCalls: toolCallsCombined.length ? JSON.stringify(toolCallsCombined) : '',
254275
...(aggregatedTimings ? { timings: aggregatedTimings } : {}),

tools/server/webui/src/lib/services/tools/codeInterpreter.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ export const codeInterpreterToolDefinition: ApiToolDefinition = {
1111
function: {
1212
name: CODE_INTERPRETER_JS_TOOL_NAME,
1313
description:
14-
'Execute JavaScript in a sandboxed environment. Returns console output and the final evaluated value.',
14+
'Execute JavaScript in a sandboxed Worker. Your code runs inside an async function (top-level await is supported). Do not wrap code in an async IIFE like (async () => { ... })() unless you return/await it, otherwise the tool may finish before async logs run. If you use promises, they must be awaited. Returns combined console output and the final evaluated value. (no output) likely indicates either an unawaited promise or that you did not output anything.',
1515
parameters: {
1616
type: 'object',
1717
properties: {
@@ -336,7 +336,7 @@ registerTool({
336336
} else if (result !== undefined) {
337337
combined += result;
338338
} else if (!combined) {
339-
combined = '(no output)';
339+
combined = '(no output, did you forget to await a top level promise?)';
340340
}
341341
return { content: combined };
342342
}

tools/server/webui/tests/client/chatMessages.tool-inline.test.ts

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ describe('ChatMessages inline tool rendering', () => {
4242

4343
// Message chain: user -> assistant(thinking+toolcall) -> tool -> assistant(thinking) -> tool -> assistant(final)
4444
const user = msg('u1', 'user', 'Question', null);
45-
const a1 = msg('a1', 'assistant', '', user.id, {
45+
const a1 = msg('a1', 'assistant', 'Let me calculate that.', user.id, {
4646
thinking: 'step1',
4747
toolCalls: JSON.stringify([
4848
{
@@ -102,5 +102,82 @@ describe('ChatMessages inline tool rendering', () => {
102102
expect(container.textContent).toContain('20.25/7.84');
103103
expect(container.textContent).toContain('1.3689');
104104
expect(container.textContent).toContain('1.23s');
105+
106+
// Content produced before the first tool call should not be lost when the chain collapses.
107+
expect(container.textContent).toContain('Let me calculate that.');
108+
});
109+
110+
it('does not render post-reasoning tool calls inside the reasoning block', async () => {
111+
settingsStore.config = {
112+
...SETTING_CONFIG_DEFAULT,
113+
enableCalculatorTool: true,
114+
showThoughtInProgress: true
115+
};
116+
117+
conversationsStore.activeConversation = {
118+
id: 'c1',
119+
name: 'Test',
120+
currNode: null,
121+
lastModified: Date.now()
122+
};
123+
124+
const user = msg('u1', 'user', 'Question', null);
125+
const a1 = msg('a1', 'assistant', 'Here is the answer (before tool).', user.id, {
126+
thinking: 'done thinking',
127+
toolCalls: JSON.stringify([
128+
{
129+
id: 'call-1',
130+
type: 'function',
131+
function: { name: 'calculator', arguments: JSON.stringify({ expression: '1+1' }) }
132+
}
133+
]),
134+
// Simulate streaming so the reasoning block is expanded and in-DOM.
135+
timestamp: 0
136+
});
137+
const t1 = msg(
138+
't1',
139+
'tool',
140+
JSON.stringify({ expression: '1+1', result: '2', duration_ms: 10 }),
141+
a1.id,
142+
{
143+
toolCallId: 'call-1'
144+
}
145+
);
146+
const a2 = msg('a2', 'assistant', 'And here is the rest (after tool).', t1.id, {
147+
timestamp: 0
148+
});
149+
150+
const messages = [user, a1, t1, a2];
151+
conversationsStore.activeMessages = messages;
152+
153+
const { container } = render(TestMessagesWrapper, {
154+
target: document.body,
155+
props: { messages }
156+
});
157+
158+
const assistant = container.querySelector('[aria-label="Assistant message with actions"]');
159+
expect(assistant).toBeTruthy();
160+
161+
// Tool call should exist overall...
162+
expect(container.querySelectorAll('[data-testid="tool-call-block"]').length).toBe(1);
163+
164+
// ...but it should not be rendered inside the reasoning collapsible content.
165+
const reasoningRoot = assistant
166+
? Array.from(assistant.querySelectorAll('[data-state]')).find((el) =>
167+
(el.textContent ?? '').includes('Reasoning')
168+
)
169+
: null;
170+
expect(reasoningRoot).toBeTruthy();
171+
expect(reasoningRoot?.querySelectorAll('[data-testid="tool-call-block"]').length ?? 0).toBe(0);
172+
173+
// Ordering: pre-tool content -> tool arguments -> post-tool content.
174+
const fullText = container.textContent ?? '';
175+
expect(fullText.indexOf('Here is the answer (before tool).')).toBeGreaterThanOrEqual(0);
176+
expect(fullText.indexOf('Arguments')).toBeGreaterThan(
177+
fullText.indexOf('Here is the answer (before tool).')
178+
);
179+
expect(fullText.indexOf('And here is the rest (after tool).')).toBeGreaterThan(
180+
fullText.indexOf('Arguments')
181+
);
105182
});
106183
});

0 commit comments

Comments
 (0)