From a7c1bda07cfda0b6a2068dea1b88beb88de462ba Mon Sep 17 00:00:00 2001
From: c4ch3c4d3 <23181631+c4ch3c4d3@users.noreply.github.com>
Date: Mon, 27 Apr 2026 10:58:13 -0600
Subject: [PATCH] Add configurable token limit and truncation warning to Lab 1
confidence chat
---
src/app/api/lab1/chat/route.ts | 22 ++++++---
.../labs/Lab1ConfidenceChat.test.tsx | 45 +++++++++++++++++++
src/components/labs/Lab1ConfidenceChat.tsx | 9 ++++
src/lib/lab1-confidence.test.ts | 24 ++++++++++
src/lib/lab1-confidence.ts | 27 ++++++++++-
5 files changed, 121 insertions(+), 6 deletions(-)
diff --git a/src/app/api/lab1/chat/route.ts b/src/app/api/lab1/chat/route.ts
index 5bafdf1..14a5638 100644
--- a/src/app/api/lab1/chat/route.ts
+++ b/src/app/api/lab1/chat/route.ts
@@ -4,11 +4,12 @@ import { normalizeUpstreamChatEndpoint } from "~/lib/lab2-chat";
import {
clampLab1Messages,
extractLab1AssistantContent,
+ extractLab1FinishReason,
extractLab1ResponseTokens,
getLab1SystemPrompt,
LAB1_CONFIDENCE_MODEL_ALIAS,
- LAB1_DEFAULT_MAX_TOKENS,
LAB1_DEFAULT_TEMPERATURE,
+ parseLab1MaxTokens,
type Lab1ConfidenceMessage,
} from "~/lib/lab1-confidence";
@@ -32,6 +33,10 @@ function getLab1ModelAlias() {
);
}
+function getLab1MaxTokens() {
+ return parseLab1MaxTokens(process.env.COURSEWARE_LAB1_MAX_TOKENS?.trim());
+}
+
export async function POST(request: Request) {
let body: ChatRouteRequestBody;
@@ -62,10 +67,11 @@ export async function POST(request: Request) {
);
try {
+ const maxTokens = getLab1MaxTokens();
const upstreamResponse = await fetch(getLocalOllamaEndpoint(), {
body: JSON.stringify({
logprobs: true,
- max_tokens: LAB1_DEFAULT_MAX_TOKENS,
+ max_tokens: maxTokens,
messages: [
{
content: getLab1SystemPrompt(),
@@ -131,13 +137,18 @@ export async function POST(request: Request) {
const content =
extractLab1AssistantContent(parsedBody) ||
tokens.map((token) => token.token).join("");
+ const finishReason = extractLab1FinishReason(parsedBody);
+ const isTruncated = finishReason === "length";
return NextResponse.json({
content,
+ finishReason,
+ isTruncated,
+ maxTokens,
model:
- ("model" in parsedBody && typeof parsedBody.model === "string"
+ "model" in parsedBody && typeof parsedBody.model === "string"
? parsedBody.model
- : getLab1ModelAlias()),
+ : getLab1ModelAlias(),
role: "assistant",
tokens,
});
@@ -153,7 +164,8 @@ export async function POST(request: Request) {
return NextResponse.json(
{
- error: "The Lab 1 confidence route could not reach the local Ollama endpoint.",
+ error:
+ "The Lab 1 confidence route could not reach the local Ollama endpoint.",
},
{ status: 502 },
);
diff --git a/src/components/labs/Lab1ConfidenceChat.test.tsx b/src/components/labs/Lab1ConfidenceChat.test.tsx
index ff25962..cbfba4f 100644
--- a/src/components/labs/Lab1ConfidenceChat.test.tsx
+++ b/src/components/labs/Lab1ConfidenceChat.test.tsx
@@ -15,6 +15,9 @@ describe("Lab1ConfidenceChat", () => {
return {
json: async () => ({
content: "often works",
+ finishReason: "stop",
+ isTruncated: false,
+ maxTokens: 512,
model: "batiai/gemma4-e2b:q4",
role: "assistant",
tokens: [
@@ -86,4 +89,46 @@ describe("Lab1ConfidenceChat", () => {
await screen.findByText("The local Ollama request failed."),
).toBeInTheDocument();
});
+
+ it("explains when the response hit the configured token limit", async () => {
+ vi.stubGlobal(
+ "fetch",
+ vi.fn(async () => {
+ return {
+ json: async () => ({
+ content: "partial output",
+ finishReason: "length",
+ isTruncated: true,
+ maxTokens: 512,
+ model: "batiai/gemma4-e2b:q4",
+ role: "assistant",
+ tokens: [
+ {
+ logprob: Math.log(0.5),
+ probability: 50,
+ token: "partial",
+ topAlternatives: [],
+ },
+ ],
+ }),
+ ok: true,
+ };
+ }),
+ );
+
+ render(
+ Response reached the configured{" "}
+ {message.maxTokens ? `${message.maxTokens}-token` : "token"}{" "}
+ limit. Increase COURSEWARE_LAB1_MAX_TOKENS to
+ allow longer Lab 1 generations.
+
{message.error} diff --git a/src/lib/lab1-confidence.test.ts b/src/lib/lab1-confidence.test.ts index 1f1b551..05dc9a7 100644 --- a/src/lib/lab1-confidence.test.ts +++ b/src/lib/lab1-confidence.test.ts @@ -2,10 +2,12 @@ import { describe, expect, it } from "vitest"; import { extractLab1AssistantContent, + extractLab1FinishReason, extractLab1ResponseTokens, formatProbabilityPercent, getConfidenceBand, logprobToProbabilityPercent, + parseLab1MaxTokens, } from "~/lib/lab1-confidence"; describe("logprobToProbabilityPercent", () => { @@ -30,6 +32,28 @@ describe("extractLab1AssistantContent", () => { }); }); +describe("extractLab1FinishReason", () => { + it("reads the upstream finish reason when it is present", () => { + expect( + extractLab1FinishReason({ + choices: [ + { + finish_reason: "length", + }, + ], + }), + ).toBe("length"); + }); +}); + +describe("parseLab1MaxTokens", () => { + it("uses a bounded positive environment override", () => { + expect(parseLab1MaxTokens("768")).toBe(768); + expect(parseLab1MaxTokens("999999")).toBe(2048); + expect(parseLab1MaxTokens("nope")).toBe(512); + }); +}); + describe("extractLab1ResponseTokens", () => { it("maps token logprobs and alternate candidates into display data", () => { expect( diff --git a/src/lib/lab1-confidence.ts b/src/lib/lab1-confidence.ts index db76480..4993a73 100644 --- a/src/lib/lab1-confidence.ts +++ b/src/lib/lab1-confidence.ts @@ -1,6 +1,7 @@ export const LAB1_CONFIDENCE_MODEL_ALIAS = "batiai/gemma4-e2b:q4"; -export const LAB1_DEFAULT_MAX_TOKENS = 64; +export const LAB1_DEFAULT_MAX_TOKENS = 512; export const LAB1_DEFAULT_TEMPERATURE = 0.7; +export const LAB1_MAX_COMPLETION_TOKENS = 2048; export const LAB1_MAX_CONTEXT_MESSAGES = 10; export const LAB1_MAX_MESSAGE_LENGTH = 4000; @@ -25,6 +26,9 @@ export type Lab1ResponseToken = { export type Lab1ConfidenceResponse = { content: string; + finishReason: string | null; + isTruncated: boolean; + maxTokens: number; model: string; role: "assistant"; tokens: Lab1ResponseToken[]; @@ -43,6 +47,7 @@ type OpenAiLogprobToken = { type OpenAiCompatibilityPayload = { choices?: Array<{ + finish_reason?: string; logprobs?: { content?: OpenAiLogprobToken[]; }; @@ -61,6 +66,19 @@ export function getLab1SystemPrompt() { ].join(" "); } +export function parseLab1MaxTokens(value: string | undefined) { + if (!value) { + return LAB1_DEFAULT_MAX_TOKENS; + } + + const parsedValue = Number.parseInt(value, 10); + if (!Number.isFinite(parsedValue) || parsedValue <= 0) { + return LAB1_DEFAULT_MAX_TOKENS; + } + + return Math.min(parsedValue, LAB1_MAX_COMPLETION_TOKENS); +} + export function clampLab1Messages(messages: Lab1ConfidenceMessage[]) { return messages .filter((message) => { @@ -117,6 +135,13 @@ export function extractLab1AssistantContent( return content || null; } +export function extractLab1FinishReason(payload: OpenAiCompatibilityPayload) { + const finishReason = payload.choices?.[0]?.finish_reason; + return typeof finishReason === "string" && finishReason.trim() + ? finishReason + : null; +} + export function extractLab1ResponseTokens( payload: OpenAiCompatibilityPayload, ): Lab1ResponseToken[] {