Files
LLM-Labs/src/lib/lab1-confidence.test.ts
T

110 lines
2.9 KiB
TypeScript

import { describe, expect, it } from "vitest";
import {
extractLab1AssistantContent,
extractLab1FinishReason,
extractLab1ResponseTokens,
formatProbabilityPercent,
getConfidenceBand,
logprobToProbabilityPercent,
parseLab1MaxTokens,
} from "~/lib/lab1-confidence";
describe("logprobToProbabilityPercent", () => {
it("converts a logprob into a rounded percentage", () => {
expect(logprobToProbabilityPercent(Math.log(0.4))).toBe(40);
});
});
describe("extractLab1AssistantContent", () => {
it("reads assistant content from an OpenAI-compatible response", () => {
expect(
extractLab1AssistantContent({
choices: [
{
message: {
content: "hello from the local model",
},
},
],
}),
).toBe("hello from the local model");
});
});
describe("extractLab1FinishReason", () => {
it("reads the upstream finish reason when it is present", () => {
expect(
extractLab1FinishReason({
choices: [
{
finish_reason: "length",
},
],
}),
).toBe("length");
});
});
describe("parseLab1MaxTokens", () => {
it("uses a bounded positive environment override", () => {
expect(parseLab1MaxTokens("768")).toBe(768);
expect(parseLab1MaxTokens("999999")).toBe(2048);
expect(parseLab1MaxTokens("nope")).toBe(512);
});
});
describe("extractLab1ResponseTokens", () => {
it("maps token logprobs and alternate candidates into display data", () => {
expect(
extractLab1ResponseTokens({
choices: [
{
logprobs: {
content: [
{
logprob: Math.log(0.4),
token: "often",
top_logprobs: [
{ logprob: Math.log(0.4), token: "often" },
{ logprob: Math.log(0.14), token: "commonly" },
{ logprob: Math.log(0.1), token: "also" },
],
},
],
},
},
],
}),
).toEqual([
{
logprob: Math.log(0.4),
probability: 40,
token: "often",
topAlternatives: [
{ probability: 14, token: "commonly" },
{ probability: 10, token: "also" },
],
},
]);
});
});
describe("getConfidenceBand", () => {
it("assigns a stable band for each probability range", () => {
expect(getConfidenceBand(75)).toBe("very-high");
expect(getConfidenceBand(45)).toBe("high");
expect(getConfidenceBand(20)).toBe("medium");
expect(getConfidenceBand(7)).toBe("low");
expect(getConfidenceBand(1)).toBe("very-low");
});
});
describe("formatProbabilityPercent", () => {
it("formats probability values for tooltip display", () => {
expect(formatProbabilityPercent(40)).toBe("40.0%");
expect(formatProbabilityPercent(4.2)).toBe("4.20%");
expect(formatProbabilityPercent(0.456)).toBe("0.456%");
});
});