diff --git a/content/labs/lab-4-oi-prompting.md b/content/labs/lab-4-oi-prompting.md
index 0023b1c..d0a7852 100644
--- a/content/labs/lab-4-oi-prompting.md
+++ b/content/labs/lab-4-oi-prompting.md
@@ -14,6 +14,7 @@ In this lab, we will:
- Run Open WebUI
- Using an Ollama Model within Open WebUI
+- Visualizing Inference Parameters
- Experimenting with Inference Parameters
- Experimenting with Prompting Techniques
@@ -123,7 +124,19 @@ Locate, pull, and run **Qwen3.5 4B** using the **Open WebUI**. By defualt, Ope
---
-## Objective 3: Inference Settings
+## Objective 3: Inference Settings Visualization
+
+### Explore: Token Sampling Controls
+
+Before changing model settings in Open WebUI, use these three toy samplers to see what the controls do to the next-token distribution. Each widget starts from the same prompt, `The quick brown fox`, and predicts candidate continuations toward the familiar phrase `jumps over the lazy dog`.
+
+Temperature reshapes the whole distribution. Top K removes every candidate outside the K most likely tokens. Top P keeps the smallest group of candidates whose cumulative probability reaches P, while Min P keeps candidates above a probability floor relative to the strongest candidate.
+
+
+
+---
+
+## Objective 4: Inference Settings
### Explore: OUI Inference Parameter Valves
@@ -205,7 +218,7 @@ Feel free to continue to explore with other topics or images. Note how each time
---
-## Objective 4: Prompting Techniques
+## Objective 5: Prompting Techniques
### Explore: Prompt Engineering & System Prompting
@@ -342,12 +355,14 @@ Throughout this lab, we've explored the fascinating world of Open WebUI and prom
- Top K: Limits token selection to top K most likely options
- Top P: Uses nucleus sampling based on cumulative probability
-3. **Prompting Techniques**: We examined various prompting strategies:
+3. **Inference Settings Visualization**: We used a local sampler to see how Temperature, Top K, Top P, and Min P reshape candidate token selection.
+
+4. **Prompting Techniques**: We examined various prompting strategies:
- Few Shot Prompting: Providing examples of desired outputs
- Meta Prompting: Giving guidance to reach outcomes
- Chain of Thought: Encouraging step-by-step reasoning
- Self Criticism: Having the model evaluate its own responses
-4. **System Prompting**: We created custom models with specific system prompts and parameter settings, learning how to tailor LLM behavior for specialized tasks.
+5. **System Prompting**: We created custom models with specific system prompts and parameter settings, learning how to tailor LLM behavior for specialized tasks.
These concepts are foundational for effectively working with large language models in real-world applications. Remember that prompt engineering is both an art and a science - it requires understanding both the capabilities of the model and the nuances of human language. As you continue your journey with LLMs, don't hesitate to experiment with different approaches and parameters to find what works best for your specific use cases.
diff --git a/src/components/labs/InferenceSettingsVisualization.test.tsx b/src/components/labs/InferenceSettingsVisualization.test.tsx
new file mode 100644
index 0000000..bc29674
--- /dev/null
+++ b/src/components/labs/InferenceSettingsVisualization.test.tsx
@@ -0,0 +1,129 @@
+import { fireEvent, render, screen, within } from "@testing-library/react";
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+import { InferenceSettingsVisualization } from "~/components/labs/InferenceSettingsVisualization";
+
+function getCard(name: string) {
+ const card = screen.getByRole("heading", { name }).closest("article");
+ expect(card).not.toBeNull();
+ return card as HTMLElement;
+}
+
+function getCandidateRow(card: HTMLElement, token: string) {
+ const row = Array.from(
+ card.querySelectorAll(".inference-settings-viz__row"),
+ ).find((candidateRow) => candidateRow.textContent?.includes(token));
+ expect(row).not.toBeNull();
+ return row as HTMLElement;
+}
+
+describe("InferenceSettingsVisualization", () => {
+ afterEach(() => {
+ vi.restoreAllMocks();
+ });
+
+ it("renders three separate samplers with the shared fox prompt", () => {
+ render();
+
+ expect(
+ screen.getByRole("heading", {
+ name: "See inference filters reshape the next-token choice",
+ }),
+ ).toBeInTheDocument();
+ expect(
+ screen.getByRole("heading", { name: "Temperature" }),
+ ).toBeInTheDocument();
+ expect(screen.getByRole("heading", { name: "Top K" })).toBeInTheDocument();
+ expect(
+ screen.getByRole("heading", { name: "Top P / Min P" }),
+ ).toBeInTheDocument();
+ expect(
+ screen.getAllByText("The quick brown fox").length,
+ ).toBeGreaterThanOrEqual(3);
+ expect(getCard("Top P / Min P")).toHaveClass(
+ "inference-settings-viz__card--wide",
+ );
+ });
+
+ it("updates the temperature distribution when the slider changes", () => {
+ render();
+
+ const card = getCard("Temperature");
+ const jumpsRow = getCandidateRow(card, "jumps");
+ const initialText = jumpsRow.textContent;
+
+ fireEvent.change(within(card).getByLabelText("Temperature"), {
+ target: { value: "2" },
+ });
+
+ expect(jumpsRow.textContent).not.toBe(initialText);
+ });
+
+ it("excludes lower-ranked candidates from Top K sampling", () => {
+ vi.spyOn(Math, "random").mockReturnValue(0.99);
+ render();
+
+ const card = getCard("Top K");
+ fireEvent.change(within(card).getByLabelText("Top K"), {
+ target: { value: "1" },
+ });
+
+ expect(getCandidateRow(card, "jumps")).toHaveTextContent("Included");
+ expect(getCandidateRow(card, "leaps")).toHaveTextContent("Excluded");
+
+ fireEvent.click(
+ within(card).getByRole("button", { name: "Sample Next Token" }),
+ );
+ expect(
+ within(card).getByText("The quick brown fox jumps"),
+ ).toBeInTheDocument();
+ });
+
+ it("toggles Top P into Min P mode and applies the relative probability floor", () => {
+ render();
+
+ const card = getCard("Top P / Min P");
+
+ expect(within(card).getByText("Top P threshold math")).toBeInTheDocument();
+ expect(within(card).getByText("Target P")).toBeInTheDocument();
+ expect(
+ within(card).getByLabelText("Top P cumulative probability strip"),
+ ).toBeInTheDocument();
+
+ fireEvent.click(within(card).getByRole("button", { name: "Min P" }));
+
+ const minPSlider = within(card).getByLabelText("Min P");
+ expect(minPSlider).toBeInTheDocument();
+ expect(within(card).getByText("Min P threshold math")).toBeInTheDocument();
+ expect(
+ within(card).getByLabelText("Min P raw probability cutoff bars"),
+ ).toBeInTheDocument();
+
+ fireEvent.change(minPSlider, {
+ target: { value: "0.2" },
+ });
+
+ expect(getCandidateRow(card, "hops")).toHaveTextContent("Included");
+ expect(getCandidateRow(card, "darts")).toHaveTextContent("Excluded");
+ });
+
+ it("samples and resets a card sequence", () => {
+ vi.spyOn(Math, "random").mockReturnValue(0);
+ render();
+
+ const card = getCard("Temperature");
+ fireEvent.click(
+ within(card).getByRole("button", { name: "Sample Next Token" }),
+ );
+
+ expect(
+ within(card).getByText("The quick brown fox jumps"),
+ ).toBeInTheDocument();
+ expect(within(card).getByText(/Sampled "jumps"/)).toBeInTheDocument();
+
+ fireEvent.click(within(card).getByRole("button", { name: "Reset" }));
+
+ expect(within(card).getByText("The quick brown fox")).toBeInTheDocument();
+ expect(within(card).getByText("No token sampled yet")).toBeInTheDocument();
+ });
+});
diff --git a/src/components/labs/InferenceSettingsVisualization.tsx b/src/components/labs/InferenceSettingsVisualization.tsx
new file mode 100644
index 0000000..c60c28c
--- /dev/null
+++ b/src/components/labs/InferenceSettingsVisualization.tsx
@@ -0,0 +1,642 @@
+"use client";
+
+import { useMemo, useState } from "react";
+
+type Candidate = {
+ token: string;
+ raw: number;
+};
+
+type ProcessedCandidate = Candidate & {
+ included: boolean;
+ samplingProb: number;
+};
+
+type CumulativeCandidate = Candidate & {
+ cumulativeEnd: number;
+ cumulativeStart: number;
+ included: boolean;
+};
+
+type SamplerKind = "temperature" | "top-k" | "top-p";
+type NucleusMode = "top-p" | "min-p";
+
+const INITIAL_PROMPT = "The quick brown fox";
+const BAR_COLORS = [
+ "#0b72ba",
+ "#0f766e",
+ "#b77400",
+ "#7c3aed",
+ "#be123c",
+ "#4f46e5",
+ "#15803d",
+ "#a16207",
+ "#0e7490",
+ "#9333ea",
+] as const;
+
+const CANDIDATE_SETS: Record = {
+ [INITIAL_PROMPT]: [
+ { token: " jumps", raw: 0.34 },
+ { token: " leaps", raw: 0.16 },
+ { token: " runs", raw: 0.12 },
+ { token: " bounds", raw: 0.1 },
+ { token: " hops", raw: 0.08 },
+ { token: " darts", raw: 0.06 },
+ { token: " sneaks", raw: 0.05 },
+ { token: " watches", raw: 0.04 },
+ { token: " sleeps", raw: 0.03 },
+ { token: " ignores", raw: 0.02 },
+ ],
+ [`${INITIAL_PROMPT} jumps`]: [
+ { token: " over", raw: 0.48 },
+ { token: " across", raw: 0.16 },
+ { token: " past", raw: 0.12 },
+ { token: " toward", raw: 0.08 },
+ { token: " beside", raw: 0.06 },
+ { token: " near", raw: 0.04 },
+ { token: " under", raw: 0.03 },
+ { token: " through", raw: 0.03 },
+ ],
+ [`${INITIAL_PROMPT} jumps over`]: [
+ { token: " the", raw: 0.64 },
+ { token: " a", raw: 0.14 },
+ { token: " one", raw: 0.06 },
+ { token: " every", raw: 0.05 },
+ { token: " that", raw: 0.04 },
+ { token: " another", raw: 0.03 },
+ { token: " this", raw: 0.02 },
+ { token: " each", raw: 0.02 },
+ ],
+ [`${INITIAL_PROMPT} jumps over the`]: [
+ { token: " lazy", raw: 0.46 },
+ { token: " sleepy", raw: 0.14 },
+ { token: " old", raw: 0.1 },
+ { token: " tired", raw: 0.09 },
+ { token: " quiet", raw: 0.07 },
+ { token: " brown", raw: 0.05 },
+ { token: " startled", raw: 0.05 },
+ { token: " patient", raw: 0.04 },
+ ],
+ [`${INITIAL_PROMPT} jumps over the lazy`]: [
+ { token: " dog", raw: 0.68 },
+ { token: " hound", raw: 0.1 },
+ { token: " pup", raw: 0.07 },
+ { token: " cat", raw: 0.05 },
+ { token: " animal", raw: 0.04 },
+ { token: " spaniel", raw: 0.03 },
+ { token: " retriever", raw: 0.02 },
+ { token: " watchdog", raw: 0.01 },
+ ],
+};
+
+const FALLBACK_CANDIDATES: Candidate[] = [
+ { token: ".", raw: 0.28 },
+ { token: " and", raw: 0.18 },
+ { token: " while", raw: 0.12 },
+ { token: " before", raw: 0.1 },
+ { token: " near", raw: 0.09 },
+ { token: " again", raw: 0.08 },
+ { token: ",", raw: 0.08 },
+ { token: " quickly", raw: 0.07 },
+];
+
+function normalize(candidates: Candidate[]): Candidate[] {
+ const sum = candidates.reduce((total, candidate) => total + candidate.raw, 0);
+ if (sum <= 0) return candidates;
+ return candidates.map((candidate) => ({
+ ...candidate,
+ raw: candidate.raw / sum,
+ }));
+}
+
+function getCandidates(sequence: string) {
+ return normalize(CANDIDATE_SETS[sequence] ?? FALLBACK_CANDIDATES);
+}
+
+function renormalizeIncluded(
+ candidates: Array,
+): ProcessedCandidate[] {
+ const includedSum = candidates.reduce((total, candidate) => {
+ return candidate.included ? total + candidate.score : total;
+ }, 0);
+
+ return candidates.map(({ score: _score, ...candidate }) => ({
+ ...candidate,
+ samplingProb:
+ candidate.included && includedSum > 0 ? _score / includedSum : 0,
+ }));
+}
+
+function applyTemperature(
+ candidates: Candidate[],
+ temperature: number,
+): ProcessedCandidate[] {
+ const logits = candidates.map(
+ (candidate) => Math.log(candidate.raw) / temperature,
+ );
+ const maxLogit = Math.max(...logits);
+ const exps = logits.map((logit) => Math.exp(logit - maxLogit));
+ const sum = exps.reduce((total, value) => total + value, 0);
+
+ return candidates.map((candidate, index) => ({
+ ...candidate,
+ included: true,
+ samplingProb: exps[index] ? exps[index] / sum : 0,
+ }));
+}
+
+function applyTopK(
+ candidates: Candidate[],
+ topK: number,
+): ProcessedCandidate[] {
+ const includedTokens = new Set(
+ [...candidates]
+ .sort((left, right) => right.raw - left.raw)
+ .slice(0, topK)
+ .map((candidate) => candidate.token),
+ );
+
+ return renormalizeIncluded(
+ candidates.map((candidate) => ({
+ ...candidate,
+ included: includedTokens.has(candidate.token),
+ score: candidate.raw,
+ })),
+ );
+}
+
+function applyTopP(
+ candidates: Candidate[],
+ topP: number,
+): ProcessedCandidate[] {
+ const sortedCandidates = [...candidates].sort(
+ (left, right) => right.raw - left.raw,
+ );
+ const includedTokens = new Set();
+ let cumulativeProbability = 0;
+
+ for (const candidate of sortedCandidates) {
+ includedTokens.add(candidate.token);
+ cumulativeProbability += candidate.raw;
+ if (cumulativeProbability >= topP) break;
+ }
+
+ return renormalizeIncluded(
+ candidates.map((candidate) => ({
+ ...candidate,
+ included: includedTokens.has(candidate.token),
+ score: candidate.raw,
+ })),
+ );
+}
+
+function applyMinP(
+ candidates: Candidate[],
+ minP: number,
+): ProcessedCandidate[] {
+ const highestProbability = Math.max(
+ ...candidates.map((candidate) => candidate.raw),
+ );
+ const threshold = highestProbability * minP;
+
+ return renormalizeIncluded(
+ candidates.map((candidate) => ({
+ ...candidate,
+ included: candidate.raw >= threshold,
+ score: candidate.raw,
+ })),
+ );
+}
+
+function sampleCandidate(candidates: ProcessedCandidate[]) {
+ const includedCandidates = candidates.filter(
+ (candidate) => candidate.included,
+ );
+ if (includedCandidates.length === 0) return candidates[0] ?? null;
+
+ let cursor = Math.random();
+ for (const candidate of includedCandidates) {
+ cursor -= candidate.samplingProb;
+ if (cursor <= 0) return candidate;
+ }
+
+ return includedCandidates[includedCandidates.length - 1] ?? null;
+}
+
+function formatPercent(value: number) {
+ return `${(value * 100).toFixed(1)}%`;
+}
+
+function getSortedCandidates(candidates: Candidate[]) {
+ return [...candidates].sort((left, right) => right.raw - left.raw);
+}
+
+function getCumulativeCandidates(
+ candidates: Candidate[],
+ processedCandidates: ProcessedCandidate[],
+): CumulativeCandidate[] {
+ const includedTokens = new Set(
+ processedCandidates
+ .filter((candidate) => candidate.included)
+ .map((candidate) => candidate.token),
+ );
+ let cumulativeProbability = 0;
+
+ return getSortedCandidates(candidates).map((candidate) => {
+ const cumulativeStart = cumulativeProbability;
+ cumulativeProbability += candidate.raw;
+ return {
+ ...candidate,
+ cumulativeEnd: cumulativeProbability,
+ cumulativeStart,
+ included: includedTokens.has(candidate.token),
+ };
+ });
+}
+
+function getIncludedRawSum(processedCandidates: ProcessedCandidate[]) {
+ return processedCandidates.reduce((total, candidate) => {
+ return candidate.included ? total + candidate.raw : total;
+ }, 0);
+}
+
+function NucleusThresholdVisual({
+ candidates,
+ minP,
+ mode,
+ processedCandidates,
+ topP,
+}: {
+ candidates: Candidate[];
+ minP: number;
+ mode: NucleusMode;
+ processedCandidates: ProcessedCandidate[];
+ topP: number;
+}) {
+ if (mode === "top-p") {
+ const cumulativeCandidates = getCumulativeCandidates(
+ candidates,
+ processedCandidates,
+ );
+ const includedSum = getIncludedRawSum(processedCandidates);
+ const includedTokens = cumulativeCandidates.filter(
+ (candidate) => candidate.included,
+ );
+
+ return (
+
+
+ Top P threshold math
+
+ Keep adding highest-probability tokens until cumulative probability
+ reaches {topP.toFixed(2)}.
+
+
+
+ Target P
+ {formatPercent(topP)}
+ Included mass
+ {formatPercent(includedSum)}
+
+ Included prefix:{" "}
+
+ {includedTokens
+ .map((candidate) => candidate.token.trim())
+ .join(" + ")}
+
+ . The last included token can push the total past the target because
+ tokens are discrete choices.
+