From a7c1bda07cfda0b6a2068dea1b88beb88de462ba Mon Sep 17 00:00:00 2001 From: c4ch3c4d3 <23181631+c4ch3c4d3@users.noreply.github.com> Date: Mon, 27 Apr 2026 10:58:13 -0600 Subject: [PATCH] Add configurable token limit and truncation warning to Lab 1 confidence chat --- src/app/api/lab1/chat/route.ts | 22 ++++++--- .../labs/Lab1ConfidenceChat.test.tsx | 45 +++++++++++++++++++ src/components/labs/Lab1ConfidenceChat.tsx | 9 ++++ src/lib/lab1-confidence.test.ts | 24 ++++++++++ src/lib/lab1-confidence.ts | 27 ++++++++++- 5 files changed, 121 insertions(+), 6 deletions(-) diff --git a/src/app/api/lab1/chat/route.ts b/src/app/api/lab1/chat/route.ts index 5bafdf1..14a5638 100644 --- a/src/app/api/lab1/chat/route.ts +++ b/src/app/api/lab1/chat/route.ts @@ -4,11 +4,12 @@ import { normalizeUpstreamChatEndpoint } from "~/lib/lab2-chat"; import { clampLab1Messages, extractLab1AssistantContent, + extractLab1FinishReason, extractLab1ResponseTokens, getLab1SystemPrompt, LAB1_CONFIDENCE_MODEL_ALIAS, - LAB1_DEFAULT_MAX_TOKENS, LAB1_DEFAULT_TEMPERATURE, + parseLab1MaxTokens, type Lab1ConfidenceMessage, } from "~/lib/lab1-confidence"; @@ -32,6 +33,10 @@ function getLab1ModelAlias() { ); } +function getLab1MaxTokens() { + return parseLab1MaxTokens(process.env.COURSEWARE_LAB1_MAX_TOKENS?.trim()); +} + export async function POST(request: Request) { let body: ChatRouteRequestBody; @@ -62,10 +67,11 @@ export async function POST(request: Request) { ); try { + const maxTokens = getLab1MaxTokens(); const upstreamResponse = await fetch(getLocalOllamaEndpoint(), { body: JSON.stringify({ logprobs: true, - max_tokens: LAB1_DEFAULT_MAX_TOKENS, + max_tokens: maxTokens, messages: [ { content: getLab1SystemPrompt(), @@ -131,13 +137,18 @@ export async function POST(request: Request) { const content = extractLab1AssistantContent(parsedBody) || tokens.map((token) => token.token).join(""); + const finishReason = extractLab1FinishReason(parsedBody); + const isTruncated = finishReason === "length"; return NextResponse.json({ content, + finishReason, + isTruncated, + maxTokens, model: - ("model" in parsedBody && typeof parsedBody.model === "string" + "model" in parsedBody && typeof parsedBody.model === "string" ? parsedBody.model - : getLab1ModelAlias()), + : getLab1ModelAlias(), role: "assistant", tokens, }); @@ -153,7 +164,8 @@ export async function POST(request: Request) { return NextResponse.json( { - error: "The Lab 1 confidence route could not reach the local Ollama endpoint.", + error: + "The Lab 1 confidence route could not reach the local Ollama endpoint.", }, { status: 502 }, ); diff --git a/src/components/labs/Lab1ConfidenceChat.test.tsx b/src/components/labs/Lab1ConfidenceChat.test.tsx index ff25962..cbfba4f 100644 --- a/src/components/labs/Lab1ConfidenceChat.test.tsx +++ b/src/components/labs/Lab1ConfidenceChat.test.tsx @@ -15,6 +15,9 @@ describe("Lab1ConfidenceChat", () => { return { json: async () => ({ content: "often works", + finishReason: "stop", + isTruncated: false, + maxTokens: 512, model: "batiai/gemma4-e2b:q4", role: "assistant", tokens: [ @@ -86,4 +89,46 @@ describe("Lab1ConfidenceChat", () => { await screen.findByText("The local Ollama request failed."), ).toBeInTheDocument(); }); + + it("explains when the response hit the configured token limit", async () => { + vi.stubGlobal( + "fetch", + vi.fn(async () => { + return { + json: async () => ({ + content: "partial output", + finishReason: "length", + isTruncated: true, + maxTokens: 512, + model: "batiai/gemma4-e2b:q4", + role: "assistant", + tokens: [ + { + logprob: Math.log(0.5), + probability: 50, + token: "partial", + topAlternatives: [], + }, + ], + }), + ok: true, + }; + }), + ); + + render(); + + fireEvent.change(screen.getByLabelText("Prompt"), { + target: { value: "Write a longer answer." }, + }); + fireEvent.submit( + screen.getByRole("button", { name: "Generate Output" }).closest("form")!, + ); + + expect( + await screen.findByText( + /Response reached the configured 512-token limit/, + ), + ).toBeInTheDocument(); + }); }); diff --git a/src/components/labs/Lab1ConfidenceChat.tsx b/src/components/labs/Lab1ConfidenceChat.tsx index 3b9cd19..7da0b47 100644 --- a/src/components/labs/Lab1ConfidenceChat.tsx +++ b/src/components/labs/Lab1ConfidenceChat.tsx @@ -304,6 +304,15 @@ export function Lab1ConfidenceChat() { })} + {message.isTruncated ? ( +

+ Response reached the configured{" "} + {message.maxTokens ? `${message.maxTokens}-token` : "token"}{" "} + limit. Increase COURSEWARE_LAB1_MAX_TOKENS to + allow longer Lab 1 generations. +

+ ) : null} + {message.error ? (

{message.error} diff --git a/src/lib/lab1-confidence.test.ts b/src/lib/lab1-confidence.test.ts index 1f1b551..05dc9a7 100644 --- a/src/lib/lab1-confidence.test.ts +++ b/src/lib/lab1-confidence.test.ts @@ -2,10 +2,12 @@ import { describe, expect, it } from "vitest"; import { extractLab1AssistantContent, + extractLab1FinishReason, extractLab1ResponseTokens, formatProbabilityPercent, getConfidenceBand, logprobToProbabilityPercent, + parseLab1MaxTokens, } from "~/lib/lab1-confidence"; describe("logprobToProbabilityPercent", () => { @@ -30,6 +32,28 @@ describe("extractLab1AssistantContent", () => { }); }); +describe("extractLab1FinishReason", () => { + it("reads the upstream finish reason when it is present", () => { + expect( + extractLab1FinishReason({ + choices: [ + { + finish_reason: "length", + }, + ], + }), + ).toBe("length"); + }); +}); + +describe("parseLab1MaxTokens", () => { + it("uses a bounded positive environment override", () => { + expect(parseLab1MaxTokens("768")).toBe(768); + expect(parseLab1MaxTokens("999999")).toBe(2048); + expect(parseLab1MaxTokens("nope")).toBe(512); + }); +}); + describe("extractLab1ResponseTokens", () => { it("maps token logprobs and alternate candidates into display data", () => { expect( diff --git a/src/lib/lab1-confidence.ts b/src/lib/lab1-confidence.ts index db76480..4993a73 100644 --- a/src/lib/lab1-confidence.ts +++ b/src/lib/lab1-confidence.ts @@ -1,6 +1,7 @@ export const LAB1_CONFIDENCE_MODEL_ALIAS = "batiai/gemma4-e2b:q4"; -export const LAB1_DEFAULT_MAX_TOKENS = 64; +export const LAB1_DEFAULT_MAX_TOKENS = 512; export const LAB1_DEFAULT_TEMPERATURE = 0.7; +export const LAB1_MAX_COMPLETION_TOKENS = 2048; export const LAB1_MAX_CONTEXT_MESSAGES = 10; export const LAB1_MAX_MESSAGE_LENGTH = 4000; @@ -25,6 +26,9 @@ export type Lab1ResponseToken = { export type Lab1ConfidenceResponse = { content: string; + finishReason: string | null; + isTruncated: boolean; + maxTokens: number; model: string; role: "assistant"; tokens: Lab1ResponseToken[]; @@ -43,6 +47,7 @@ type OpenAiLogprobToken = { type OpenAiCompatibilityPayload = { choices?: Array<{ + finish_reason?: string; logprobs?: { content?: OpenAiLogprobToken[]; }; @@ -61,6 +66,19 @@ export function getLab1SystemPrompt() { ].join(" "); } +export function parseLab1MaxTokens(value: string | undefined) { + if (!value) { + return LAB1_DEFAULT_MAX_TOKENS; + } + + const parsedValue = Number.parseInt(value, 10); + if (!Number.isFinite(parsedValue) || parsedValue <= 0) { + return LAB1_DEFAULT_MAX_TOKENS; + } + + return Math.min(parsedValue, LAB1_MAX_COMPLETION_TOKENS); +} + export function clampLab1Messages(messages: Lab1ConfidenceMessage[]) { return messages .filter((message) => { @@ -117,6 +135,13 @@ export function extractLab1AssistantContent( return content || null; } +export function extractLab1FinishReason(payload: OpenAiCompatibilityPayload) { + const finishReason = payload.choices?.[0]?.finish_reason; + return typeof finishReason === "string" && finishReason.trim() + ? finishReason + : null; +} + export function extractLab1ResponseTokens( payload: OpenAiCompatibilityPayload, ): Lab1ResponseToken[] {