diff --git a/content/labs/lab-4-oi-prompting.md b/content/labs/lab-4-oi-prompting.md index 0023b1c..d0a7852 100644 --- a/content/labs/lab-4-oi-prompting.md +++ b/content/labs/lab-4-oi-prompting.md @@ -14,6 +14,7 @@ In this lab, we will: - Run Open WebUI - Using an Ollama Model within Open WebUI +- Visualizing Inference Parameters - Experimenting with Inference Parameters - Experimenting with Prompting Techniques @@ -123,7 +124,19 @@ Locate, pull, and run **Qwen3.5 4B** using the **Open WebUI**. By defualt, Ope --- -## Objective 3: Inference Settings +## Objective 3: Inference Settings Visualization + +### Explore: Token Sampling Controls + +Before changing model settings in Open WebUI, use these three toy samplers to see what the controls do to the next-token distribution. Each widget starts from the same prompt, `The quick brown fox`, and predicts candidate continuations toward the familiar phrase `jumps over the lazy dog`. + +Temperature reshapes the whole distribution. Top K removes every candidate outside the K most likely tokens. Top P keeps the smallest group of candidates whose cumulative probability reaches P, while Min P keeps candidates above a probability floor relative to the strongest candidate. + +
+ +--- + +## Objective 4: Inference Settings ### Explore: OUI Inference Parameter Valves @@ -205,7 +218,7 @@ Feel free to continue to explore with other topics or images. Note how each time --- -## Objective 4: Prompting Techniques +## Objective 5: Prompting Techniques ### Explore: Prompt Engineering & System Prompting @@ -342,12 +355,14 @@ Throughout this lab, we've explored the fascinating world of Open WebUI and prom - Top K: Limits token selection to top K most likely options - Top P: Uses nucleus sampling based on cumulative probability -3. **Prompting Techniques**: We examined various prompting strategies: +3. **Inference Settings Visualization**: We used a local sampler to see how Temperature, Top K, Top P, and Min P reshape candidate token selection. + +4. **Prompting Techniques**: We examined various prompting strategies: - Few Shot Prompting: Providing examples of desired outputs - Meta Prompting: Giving guidance to reach outcomes - Chain of Thought: Encouraging step-by-step reasoning - Self Criticism: Having the model evaluate its own responses -4. **System Prompting**: We created custom models with specific system prompts and parameter settings, learning how to tailor LLM behavior for specialized tasks. +5. **System Prompting**: We created custom models with specific system prompts and parameter settings, learning how to tailor LLM behavior for specialized tasks. These concepts are foundational for effectively working with large language models in real-world applications. Remember that prompt engineering is both an art and a science - it requires understanding both the capabilities of the model and the nuances of human language. As you continue your journey with LLMs, don't hesitate to experiment with different approaches and parameters to find what works best for your specific use cases. diff --git a/src/components/labs/InferenceSettingsVisualization.test.tsx b/src/components/labs/InferenceSettingsVisualization.test.tsx new file mode 100644 index 0000000..bc29674 --- /dev/null +++ b/src/components/labs/InferenceSettingsVisualization.test.tsx @@ -0,0 +1,129 @@ +import { fireEvent, render, screen, within } from "@testing-library/react"; +import { afterEach, describe, expect, it, vi } from "vitest"; + +import { InferenceSettingsVisualization } from "~/components/labs/InferenceSettingsVisualization"; + +function getCard(name: string) { + const card = screen.getByRole("heading", { name }).closest("article"); + expect(card).not.toBeNull(); + return card as HTMLElement; +} + +function getCandidateRow(card: HTMLElement, token: string) { + const row = Array.from( + card.querySelectorAll(".inference-settings-viz__row"), + ).find((candidateRow) => candidateRow.textContent?.includes(token)); + expect(row).not.toBeNull(); + return row as HTMLElement; +} + +describe("InferenceSettingsVisualization", () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + it("renders three separate samplers with the shared fox prompt", () => { + render(); + + expect( + screen.getByRole("heading", { + name: "See inference filters reshape the next-token choice", + }), + ).toBeInTheDocument(); + expect( + screen.getByRole("heading", { name: "Temperature" }), + ).toBeInTheDocument(); + expect(screen.getByRole("heading", { name: "Top K" })).toBeInTheDocument(); + expect( + screen.getByRole("heading", { name: "Top P / Min P" }), + ).toBeInTheDocument(); + expect( + screen.getAllByText("The quick brown fox").length, + ).toBeGreaterThanOrEqual(3); + expect(getCard("Top P / Min P")).toHaveClass( + "inference-settings-viz__card--wide", + ); + }); + + it("updates the temperature distribution when the slider changes", () => { + render(); + + const card = getCard("Temperature"); + const jumpsRow = getCandidateRow(card, "jumps"); + const initialText = jumpsRow.textContent; + + fireEvent.change(within(card).getByLabelText("Temperature"), { + target: { value: "2" }, + }); + + expect(jumpsRow.textContent).not.toBe(initialText); + }); + + it("excludes lower-ranked candidates from Top K sampling", () => { + vi.spyOn(Math, "random").mockReturnValue(0.99); + render(); + + const card = getCard("Top K"); + fireEvent.change(within(card).getByLabelText("Top K"), { + target: { value: "1" }, + }); + + expect(getCandidateRow(card, "jumps")).toHaveTextContent("Included"); + expect(getCandidateRow(card, "leaps")).toHaveTextContent("Excluded"); + + fireEvent.click( + within(card).getByRole("button", { name: "Sample Next Token" }), + ); + expect( + within(card).getByText("The quick brown fox jumps"), + ).toBeInTheDocument(); + }); + + it("toggles Top P into Min P mode and applies the relative probability floor", () => { + render(); + + const card = getCard("Top P / Min P"); + + expect(within(card).getByText("Top P threshold math")).toBeInTheDocument(); + expect(within(card).getByText("Target P")).toBeInTheDocument(); + expect( + within(card).getByLabelText("Top P cumulative probability strip"), + ).toBeInTheDocument(); + + fireEvent.click(within(card).getByRole("button", { name: "Min P" })); + + const minPSlider = within(card).getByLabelText("Min P"); + expect(minPSlider).toBeInTheDocument(); + expect(within(card).getByText("Min P threshold math")).toBeInTheDocument(); + expect( + within(card).getByLabelText("Min P raw probability cutoff bars"), + ).toBeInTheDocument(); + + fireEvent.change(minPSlider, { + target: { value: "0.2" }, + }); + + expect(getCandidateRow(card, "hops")).toHaveTextContent("Included"); + expect(getCandidateRow(card, "darts")).toHaveTextContent("Excluded"); + }); + + it("samples and resets a card sequence", () => { + vi.spyOn(Math, "random").mockReturnValue(0); + render(); + + const card = getCard("Temperature"); + fireEvent.click( + within(card).getByRole("button", { name: "Sample Next Token" }), + ); + + expect( + within(card).getByText("The quick brown fox jumps"), + ).toBeInTheDocument(); + expect(within(card).getByText(/Sampled "jumps"/)).toBeInTheDocument(); + + fireEvent.click(within(card).getByRole("button", { name: "Reset" })); + + expect(within(card).getByText("The quick brown fox")).toBeInTheDocument(); + expect(within(card).getByText("No token sampled yet")).toBeInTheDocument(); + }); +}); diff --git a/src/components/labs/InferenceSettingsVisualization.tsx b/src/components/labs/InferenceSettingsVisualization.tsx new file mode 100644 index 0000000..c60c28c --- /dev/null +++ b/src/components/labs/InferenceSettingsVisualization.tsx @@ -0,0 +1,642 @@ +"use client"; + +import { useMemo, useState } from "react"; + +type Candidate = { + token: string; + raw: number; +}; + +type ProcessedCandidate = Candidate & { + included: boolean; + samplingProb: number; +}; + +type CumulativeCandidate = Candidate & { + cumulativeEnd: number; + cumulativeStart: number; + included: boolean; +}; + +type SamplerKind = "temperature" | "top-k" | "top-p"; +type NucleusMode = "top-p" | "min-p"; + +const INITIAL_PROMPT = "The quick brown fox"; +const BAR_COLORS = [ + "#0b72ba", + "#0f766e", + "#b77400", + "#7c3aed", + "#be123c", + "#4f46e5", + "#15803d", + "#a16207", + "#0e7490", + "#9333ea", +] as const; + +const CANDIDATE_SETS: Record = { + [INITIAL_PROMPT]: [ + { token: " jumps", raw: 0.34 }, + { token: " leaps", raw: 0.16 }, + { token: " runs", raw: 0.12 }, + { token: " bounds", raw: 0.1 }, + { token: " hops", raw: 0.08 }, + { token: " darts", raw: 0.06 }, + { token: " sneaks", raw: 0.05 }, + { token: " watches", raw: 0.04 }, + { token: " sleeps", raw: 0.03 }, + { token: " ignores", raw: 0.02 }, + ], + [`${INITIAL_PROMPT} jumps`]: [ + { token: " over", raw: 0.48 }, + { token: " across", raw: 0.16 }, + { token: " past", raw: 0.12 }, + { token: " toward", raw: 0.08 }, + { token: " beside", raw: 0.06 }, + { token: " near", raw: 0.04 }, + { token: " under", raw: 0.03 }, + { token: " through", raw: 0.03 }, + ], + [`${INITIAL_PROMPT} jumps over`]: [ + { token: " the", raw: 0.64 }, + { token: " a", raw: 0.14 }, + { token: " one", raw: 0.06 }, + { token: " every", raw: 0.05 }, + { token: " that", raw: 0.04 }, + { token: " another", raw: 0.03 }, + { token: " this", raw: 0.02 }, + { token: " each", raw: 0.02 }, + ], + [`${INITIAL_PROMPT} jumps over the`]: [ + { token: " lazy", raw: 0.46 }, + { token: " sleepy", raw: 0.14 }, + { token: " old", raw: 0.1 }, + { token: " tired", raw: 0.09 }, + { token: " quiet", raw: 0.07 }, + { token: " brown", raw: 0.05 }, + { token: " startled", raw: 0.05 }, + { token: " patient", raw: 0.04 }, + ], + [`${INITIAL_PROMPT} jumps over the lazy`]: [ + { token: " dog", raw: 0.68 }, + { token: " hound", raw: 0.1 }, + { token: " pup", raw: 0.07 }, + { token: " cat", raw: 0.05 }, + { token: " animal", raw: 0.04 }, + { token: " spaniel", raw: 0.03 }, + { token: " retriever", raw: 0.02 }, + { token: " watchdog", raw: 0.01 }, + ], +}; + +const FALLBACK_CANDIDATES: Candidate[] = [ + { token: ".", raw: 0.28 }, + { token: " and", raw: 0.18 }, + { token: " while", raw: 0.12 }, + { token: " before", raw: 0.1 }, + { token: " near", raw: 0.09 }, + { token: " again", raw: 0.08 }, + { token: ",", raw: 0.08 }, + { token: " quickly", raw: 0.07 }, +]; + +function normalize(candidates: Candidate[]): Candidate[] { + const sum = candidates.reduce((total, candidate) => total + candidate.raw, 0); + if (sum <= 0) return candidates; + return candidates.map((candidate) => ({ + ...candidate, + raw: candidate.raw / sum, + })); +} + +function getCandidates(sequence: string) { + return normalize(CANDIDATE_SETS[sequence] ?? FALLBACK_CANDIDATES); +} + +function renormalizeIncluded( + candidates: Array, +): ProcessedCandidate[] { + const includedSum = candidates.reduce((total, candidate) => { + return candidate.included ? total + candidate.score : total; + }, 0); + + return candidates.map(({ score: _score, ...candidate }) => ({ + ...candidate, + samplingProb: + candidate.included && includedSum > 0 ? _score / includedSum : 0, + })); +} + +function applyTemperature( + candidates: Candidate[], + temperature: number, +): ProcessedCandidate[] { + const logits = candidates.map( + (candidate) => Math.log(candidate.raw) / temperature, + ); + const maxLogit = Math.max(...logits); + const exps = logits.map((logit) => Math.exp(logit - maxLogit)); + const sum = exps.reduce((total, value) => total + value, 0); + + return candidates.map((candidate, index) => ({ + ...candidate, + included: true, + samplingProb: exps[index] ? exps[index] / sum : 0, + })); +} + +function applyTopK( + candidates: Candidate[], + topK: number, +): ProcessedCandidate[] { + const includedTokens = new Set( + [...candidates] + .sort((left, right) => right.raw - left.raw) + .slice(0, topK) + .map((candidate) => candidate.token), + ); + + return renormalizeIncluded( + candidates.map((candidate) => ({ + ...candidate, + included: includedTokens.has(candidate.token), + score: candidate.raw, + })), + ); +} + +function applyTopP( + candidates: Candidate[], + topP: number, +): ProcessedCandidate[] { + const sortedCandidates = [...candidates].sort( + (left, right) => right.raw - left.raw, + ); + const includedTokens = new Set(); + let cumulativeProbability = 0; + + for (const candidate of sortedCandidates) { + includedTokens.add(candidate.token); + cumulativeProbability += candidate.raw; + if (cumulativeProbability >= topP) break; + } + + return renormalizeIncluded( + candidates.map((candidate) => ({ + ...candidate, + included: includedTokens.has(candidate.token), + score: candidate.raw, + })), + ); +} + +function applyMinP( + candidates: Candidate[], + minP: number, +): ProcessedCandidate[] { + const highestProbability = Math.max( + ...candidates.map((candidate) => candidate.raw), + ); + const threshold = highestProbability * minP; + + return renormalizeIncluded( + candidates.map((candidate) => ({ + ...candidate, + included: candidate.raw >= threshold, + score: candidate.raw, + })), + ); +} + +function sampleCandidate(candidates: ProcessedCandidate[]) { + const includedCandidates = candidates.filter( + (candidate) => candidate.included, + ); + if (includedCandidates.length === 0) return candidates[0] ?? null; + + let cursor = Math.random(); + for (const candidate of includedCandidates) { + cursor -= candidate.samplingProb; + if (cursor <= 0) return candidate; + } + + return includedCandidates[includedCandidates.length - 1] ?? null; +} + +function formatPercent(value: number) { + return `${(value * 100).toFixed(1)}%`; +} + +function getSortedCandidates(candidates: Candidate[]) { + return [...candidates].sort((left, right) => right.raw - left.raw); +} + +function getCumulativeCandidates( + candidates: Candidate[], + processedCandidates: ProcessedCandidate[], +): CumulativeCandidate[] { + const includedTokens = new Set( + processedCandidates + .filter((candidate) => candidate.included) + .map((candidate) => candidate.token), + ); + let cumulativeProbability = 0; + + return getSortedCandidates(candidates).map((candidate) => { + const cumulativeStart = cumulativeProbability; + cumulativeProbability += candidate.raw; + return { + ...candidate, + cumulativeEnd: cumulativeProbability, + cumulativeStart, + included: includedTokens.has(candidate.token), + }; + }); +} + +function getIncludedRawSum(processedCandidates: ProcessedCandidate[]) { + return processedCandidates.reduce((total, candidate) => { + return candidate.included ? total + candidate.raw : total; + }, 0); +} + +function NucleusThresholdVisual({ + candidates, + minP, + mode, + processedCandidates, + topP, +}: { + candidates: Candidate[]; + minP: number; + mode: NucleusMode; + processedCandidates: ProcessedCandidate[]; + topP: number; +}) { + if (mode === "top-p") { + const cumulativeCandidates = getCumulativeCandidates( + candidates, + processedCandidates, + ); + const includedSum = getIncludedRawSum(processedCandidates); + const includedTokens = cumulativeCandidates.filter( + (candidate) => candidate.included, + ); + + return ( +
+
+ Top P threshold math + + Keep adding highest-probability tokens until cumulative probability + reaches {topP.toFixed(2)}. + +
+
+ Target P + {formatPercent(topP)} + Included mass + {formatPercent(includedSum)} +
+
+ {cumulativeCandidates.map((candidate, index) => ( + + {candidate.raw >= 0.08 ? candidate.token.trim() : ""} + + ))} + + P + +
+

+ Included prefix:{" "} + + {includedTokens + .map((candidate) => candidate.token.trim()) + .join(" + ")} + + . The last included token can push the total past the target because + tokens are discrete choices. +

+
+ ); + } + + const sortedCandidates = getSortedCandidates(candidates); + const maxProbability = sortedCandidates[0]?.raw ?? 0; + const threshold = maxProbability * minP; + + return ( +
+
+ Min P threshold math + + Keep tokens whose probability is at least{" "} + min_p x strongest token. + +
+
+ Strongest token + {formatPercent(maxProbability)} + Cutoff + + {formatPercent(maxProbability)} x {minP.toFixed(2)} ={" "} + {formatPercent(threshold)} + +
+
+ {sortedCandidates.map((candidate, index) => { + const included = candidate.raw >= threshold; + return ( +
+ {candidate.token.trim()} +
+
0 ? (candidate.raw / maxProbability) * 100 : 0}%`, + }} + /> + +
+ {formatPercent(candidate.raw)} +
+ ); + })} +
+

+ The vertical marker is the minimum allowed fraction of the strongest + token. Bars that do not reach it are removed before sampling. +

+
+ ); +} + +type SamplerCardProps = { + description: string; + kind: SamplerKind; + title: string; +}; + +function SamplerCard({ description, kind, title }: SamplerCardProps) { + const [sequence, setSequence] = useState(INITIAL_PROMPT); + const [sampledMessage, setSampledMessage] = useState(""); + const [temperature, setTemperature] = useState(0.8); + const [topK, setTopK] = useState(5); + const [topP, setTopP] = useState(0.9); + const [minP, setMinP] = useState(0.05); + const [nucleusMode, setNucleusMode] = useState("top-p"); + + const candidates = useMemo(() => getCandidates(sequence), [sequence]); + const processedCandidates = useMemo(() => { + if (kind === "temperature") + return applyTemperature(candidates, temperature); + if (kind === "top-k") return applyTopK(candidates, topK); + if (nucleusMode === "min-p") return applyMinP(candidates, minP); + return applyTopP(candidates, topP); + }, [candidates, kind, minP, nucleusMode, temperature, topK, topP]); + + const sampleNextToken = () => { + const selectedCandidate = sampleCandidate(processedCandidates); + if (!selectedCandidate) return; + + setSequence( + (currentSequence) => `${currentSequence}${selectedCandidate.token}`, + ); + setSampledMessage( + `Sampled "${selectedCandidate.token.trim()}" (${formatPercent( + selectedCandidate.samplingProb, + )})`, + ); + }; + + const resetSampler = () => { + setSequence(INITIAL_PROMPT); + setSampledMessage(""); + }; + + return ( +
+
+

{title}

+

{description}

+
+ +
+ {sequence} +
+ + {kind === "temperature" ? ( + + ) : null} + + {kind === "top-k" ? ( + + ) : null} + + {kind === "top-p" ? ( +
+
+ + +
+ {nucleusMode === "top-p" ? ( + + ) : ( + + )} +
+ ) : null} + + {kind === "top-p" ? ( + + ) : null} + +
+ {processedCandidates.map((candidate, index) => ( +
+ + {candidate.token.trim() || candidate.token} + +
+
+ {formatPercent(candidate.samplingProb)} +
+
+ + {candidate.included ? "Included" : "Excluded"} + +
+ ))} +
+ +
+ + + + {sampledMessage || "No token sampled yet"} + +
+
+ ); +} + +export function InferenceSettingsVisualization() { + return ( +
+
+

+ Objective 3 Lab Widget +

+

See inference filters reshape the next-token choice

+

+ Each card starts with {INITIAL_PROMPT}. Adjust one + setting, compare the candidate bars, then sample the next token. +

+
+ +
+ + + +
+
+ ); +} diff --git a/src/components/labs/LabContent.test.tsx b/src/components/labs/LabContent.test.tsx index d842133..44a9f42 100644 --- a/src/components/labs/LabContent.test.tsx +++ b/src/components/labs/LabContent.test.tsx @@ -56,6 +56,30 @@ describe("LabContent", () => { ).toBeInTheDocument(); }); + it("renders the Lab 4 inference visualization token into an interactive component", async () => { + mockRuntimeConfig(); + + render( + , + ); + + expect( + screen.getByRole("heading", { + name: "See inference filters reshape the next-token choice", + }), + ).toBeInTheDocument(); + expect( + screen.getByRole("heading", { name: "Temperature" }), + ).toBeInTheDocument(); + expect(screen.getByRole("heading", { name: "Top K" })).toBeInTheDocument(); + expect( + screen.getByRole("heading", { name: "Top P / Min P" }), + ).toBeInTheDocument(); + }); + it("filters harness branches from a single Objective 2 selector", async () => { mockRuntimeConfig(); diff --git a/src/components/labs/LabContent.tsx b/src/components/labs/LabContent.tsx index 193ee2e..421847d 100644 --- a/src/components/labs/LabContent.tsx +++ b/src/components/labs/LabContent.tsx @@ -12,6 +12,7 @@ import { Lab1ConfidenceChat } from "~/components/labs/Lab1ConfidenceChat"; import { Lab1NetronPanel } from "~/components/labs/Lab1NetronPanel"; import { Lab3TerminalFrame } from "~/components/labs/Lab3TerminalFrame"; import { Lab8Chat } from "~/components/labs/Lab8Chat"; +import { InferenceSettingsVisualization } from "~/components/labs/InferenceSettingsVisualization"; import { Objective5Chat } from "~/components/labs/Objective5Chat"; import { QuantizationGridExplorer } from "~/components/labs/QuantizationGridExplorer"; import { QuantizationExplorer } from "~/components/labs/QuantizationExplorer"; @@ -62,6 +63,8 @@ const lab3TerminalToken = "
"; const lab1ConfidenceToken = "
"; const lab1NetronToken = "
"; const tokenizerPlaygroundToken = "
"; +const inferenceSettingsVisualizationToken = + "
"; const serviceTokenPattern = /\{\{service-(url|address):([a-z0-9-]+)(?::([^}]+))?\}\}/g; const serviceLabels: Record = { @@ -461,7 +464,7 @@ const LabContentArticle = memo(function LabContentArticle({ const renderedContent = html .split( new RegExp( - `(${escapeRegex(quantizationExplorerToken)}|${escapeRegex(quantizationGridExplorerToken)}|${escapeRegex(objective5ChatToken)}|${escapeRegex(lab8ChatToken)}|${escapeRegex(lab3TerminalToken)}|${escapeRegex(lab1ConfidenceToken)}|${escapeRegex(lab1NetronToken)}|${escapeRegex(tokenizerPlaygroundToken)})`, + `(${escapeRegex(quantizationExplorerToken)}|${escapeRegex(quantizationGridExplorerToken)}|${escapeRegex(objective5ChatToken)}|${escapeRegex(lab8ChatToken)}|${escapeRegex(lab3TerminalToken)}|${escapeRegex(lab1ConfidenceToken)}|${escapeRegex(lab1NetronToken)}|${escapeRegex(tokenizerPlaygroundToken)}|${escapeRegex(inferenceSettingsVisualizationToken)})`, "g", ), ) @@ -505,6 +508,14 @@ const LabContentArticle = memo(function LabContentArticle({ ); } + if (part === inferenceSettingsVisualizationToken) { + return ( + + ); + } + return (
diff --git a/src/styles/globals.css b/src/styles/globals.css index e871199..4dfddeb 100644 --- a/src/styles/globals.css +++ b/src/styles/globals.css @@ -956,6 +956,511 @@ ol { margin: 1.25rem 0 1.5rem; } +.lab-content [data-inference-settings-visualization] { + margin: 1.25rem 0 1.5rem; +} + +.inference-settings-viz { + margin: 1.25rem 0 1.5rem; + border: 1px solid #d7e4ef; + border-radius: 16px; + background: linear-gradient(180deg, #fbfdff, #f4f9fd); + padding: 1rem; +} + +.inference-settings-viz code { + font-family: + ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", + "Courier New", monospace; +} + +.inference-settings-viz__header { + margin-bottom: 1rem; +} + +.inference-settings-viz__eyebrow { + margin: 0; + color: #9a5f00; + font-size: 0.72rem; + font-weight: 800; + letter-spacing: 0.08em; + text-transform: uppercase; +} + +.inference-settings-viz__header h3 { + margin: 0.1rem 0 0; + color: #0f3d58; + font-size: 1.2rem; +} + +.inference-settings-viz__header p:not(.inference-settings-viz__eyebrow) { + margin: 0.55rem 0 0; + color: #334155; +} + +.inference-settings-viz__grid { + display: grid; + grid-template-columns: repeat(2, minmax(0, 1fr)); + gap: 0.9rem; + align-items: start; +} + +.inference-settings-viz__card { + display: flex; + flex-direction: column; + min-width: 0; + min-height: 100%; + border: 1px solid #dce6ee; + border-radius: 14px; + background: rgba(255, 255, 255, 0.92); + padding: 0.9rem; +} + +.inference-settings-viz__card--wide { + grid-column: 1 / -1; + width: 100%; +} + +.inference-settings-viz__card--wide > * { + width: 100%; +} + +.inference-settings-viz__card-header h4 { + margin: 0; + color: #0f3d58; + font-size: 1.05rem; + line-height: 1.35; +} + +.inference-settings-viz__card-header p { + margin: 0.35rem 0 0; + color: #475569; + font-size: 0.92rem; + line-height: 1.42; +} + +.inference-settings-viz__sequence { + margin: 0.8rem 0; + padding: 0.7rem 0.75rem; + border: 1px solid #d6e2ed; + border-radius: 10px; + background: #f7fbff; + color: #12364e; + font-family: + ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", + "Courier New", monospace; + font-size: 0.92rem; + line-height: 1.35; + min-height: 2.85rem; +} + +.inference-settings-viz__control { + --slider-thumb-size: 1rem; + --slider-thumb-offset: calc(var(--slider-thumb-size) / 2); + display: block; + margin-bottom: 0.85rem; +} + +.inference-settings-viz__control > span { + display: flex; + justify-content: space-between; + gap: 0.75rem; + color: #334155; + font-size: 0.86rem; + font-weight: 700; +} + +.inference-settings-viz__control strong { + color: #0b72ba; + font-family: + ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", + "Courier New", monospace; +} + +.inference-settings-viz__control input[type="range"] { + -webkit-appearance: none; + appearance: none; + display: block; + width: calc(100% - var(--slider-thumb-size)); + margin-left: var(--slider-thumb-offset); + margin-right: var(--slider-thumb-offset); + margin-top: 0.55rem; + background: transparent; +} + +.inference-settings-viz__control + input[type="range"]::-webkit-slider-runnable-track { + height: 0.68rem; + border-radius: 999px; + background: linear-gradient(180deg, #dbe7f2, #d4e1ec); +} + +.inference-settings-viz__control input[type="range"]::-webkit-slider-thumb { + -webkit-appearance: none; + appearance: none; + width: var(--slider-thumb-size); + height: var(--slider-thumb-size); + margin-top: calc((0.68rem - var(--slider-thumb-size)) / 2); + border: 1px solid #c8d6e3; + border-radius: 999px; + background: linear-gradient(180deg, #ffffff, #eef3f8); + box-shadow: 0 1px 4px rgba(15, 23, 42, 0.18); +} + +.inference-settings-viz__control input[type="range"]::-moz-range-track { + height: 0.68rem; + border: none; + border-radius: 999px; + background: linear-gradient(180deg, #dbe7f2, #d4e1ec); +} + +.inference-settings-viz__control input[type="range"]::-moz-range-thumb { + width: var(--slider-thumb-size); + height: var(--slider-thumb-size); + border: 1px solid #c8d6e3; + border-radius: 999px; + background: linear-gradient(180deg, #ffffff, #eef3f8); + box-shadow: 0 1px 4px rgba(15, 23, 42, 0.18); +} + +.inference-settings-viz__nucleus-controls { + margin-bottom: 0.85rem; +} + +.inference-settings-viz__nucleus-controls .inference-settings-viz__control { + margin-bottom: 0; +} + +.inference-settings-viz__segmented { + display: grid; + grid-template-columns: repeat(2, minmax(0, 1fr)); + gap: 0.25rem; + margin-bottom: 0.75rem; + padding: 0.2rem; + border: 1px solid #d6e2ed; + border-radius: 10px; + background: #f7fbff; +} + +.inference-settings-viz__segmented button { + border: 1px solid transparent; + border-radius: 8px; + background: transparent; + color: #426075; + cursor: pointer; + font: inherit; + font-size: 0.84rem; + font-weight: 800; + line-height: 1; + padding: 0.5rem 0.55rem; +} + +.inference-settings-viz__segmented button[aria-pressed="true"] { + border-color: #9cc5e5; + background: #ffffff; + color: #0f4f76; + box-shadow: 0 1px 2px rgba(15, 23, 42, 0.08); +} + +.inference-settings-viz__threshold-panel { + margin-bottom: 0.95rem; + padding: 0.85rem; + border: 1px solid #d6e2ed; + border-radius: 12px; + background: #f7fbff; +} + +.inference-settings-viz__threshold-header { + display: grid; + gap: 0.25rem; + margin-bottom: 0.75rem; +} + +.inference-settings-viz__threshold-header strong { + color: #0f3d58; + font-size: 0.92rem; +} + +.inference-settings-viz__threshold-header span { + color: #475569; + font-size: 0.86rem; + line-height: 1.4; +} + +.inference-settings-viz__formula-row { + display: grid; + grid-template-columns: max-content max-content max-content minmax(0, 1fr); + align-items: center; + gap: 0.45rem 0.6rem; + margin-bottom: 0.75rem; + color: #64748b; + font-size: 0.8rem; + font-weight: 700; +} + +.inference-settings-viz__formula-row code { + color: #0f4f76; + font-size: 0.8rem; + font-weight: 800; +} + +.inference-settings-viz__cumulative-strip { + position: relative; + display: flex; + height: 2.35rem; + overflow: visible; + border: 1px solid #cbdbe8; + border-radius: 10px; + background: #e8f1f8; +} + +.inference-settings-viz__cumulative-segment { + display: flex; + align-items: center; + justify-content: center; + min-width: 0; + height: 100%; + overflow: hidden; + color: #ffffff; + font-size: 0.72rem; + font-weight: 800; + text-overflow: ellipsis; + white-space: nowrap; +} + +.inference-settings-viz__cumulative-segment:first-child { + border-radius: 9px 0 0 9px; +} + +.inference-settings-viz__cumulative-segment:nth-last-child(2) { + border-radius: 0 9px 9px 0; +} + +.inference-settings-viz__cumulative-segment[data-included="false"] { + background: #cbd5e1 !important; + color: #475569; +} + +.inference-settings-viz__threshold-marker { + position: absolute; + top: -0.42rem; + bottom: -0.42rem; + width: 2px; + transform: translateX(-1px); + background: #be123c; + color: #be123c; +} + +.inference-settings-viz__threshold-marker { + font-size: 0; +} + +.inference-settings-viz__threshold-marker::after { + content: "P threshold"; + position: absolute; + left: 50%; + bottom: calc(100% + 0.18rem); + transform: translateX(-50%); + border: 1px solid #fecdd3; + border-radius: 999px; + background: #fff1f2; + color: #9f1239; + font-size: 0.66rem; + font-weight: 800; + line-height: 1; + padding: 0.2rem 0.34rem; + white-space: nowrap; +} + +.inference-settings-viz__threshold-note { + margin: 0.7rem 0 0; + color: #475569; + font-size: 0.84rem; + line-height: 1.42; +} + +.inference-settings-viz__threshold-note strong { + color: #0f3d58; +} + +.inference-settings-viz__minp-bars { + display: grid; + gap: 0.45rem; +} + +.inference-settings-viz__minp-row { + display: grid; + grid-template-columns: 4.35rem minmax(0, 1fr) 3.8rem; + align-items: center; + gap: 0.45rem; +} + +.inference-settings-viz__minp-row > span { + overflow: hidden; + color: #334155; + font-family: + ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", + "Courier New", monospace; + font-size: 0.78rem; + font-weight: 700; + text-align: right; + text-overflow: ellipsis; + white-space: nowrap; +} + +.inference-settings-viz__minp-row > code { + color: #334155; + font-size: 0.72rem; + font-weight: 800; +} + +.inference-settings-viz__minp-track { + position: relative; + height: 1.1rem; + border: 1px solid #d8e3ed; + border-radius: 999px; + background: #edf4fa; +} + +.inference-settings-viz__minp-fill { + height: 100%; + border-radius: 999px; +} + +.inference-settings-viz__minp-marker { + position: absolute; + top: -0.28rem; + bottom: -0.28rem; + width: 2px; + transform: translateX(-1px); + background: #be123c; +} + +.inference-settings-viz__minp-row[data-included="false"] + .inference-settings-viz__minp-fill { + opacity: 0.24; +} + +.inference-settings-viz__minp-row[data-included="false"] > span, +.inference-settings-viz__minp-row[data-included="false"] > code { + color: #94a3b8; +} + +.inference-settings-viz__bars { + display: grid; + gap: 0.42rem; +} + +.inference-settings-viz__row { + display: grid; + grid-template-columns: 4.35rem minmax(0, 1fr) 4.45rem; + align-items: center; + gap: 0.45rem; +} + +.inference-settings-viz__token { + overflow: hidden; + color: #334155; + font-family: + ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", + "Courier New", monospace; + font-size: 0.78rem; + font-weight: 700; + text-align: right; + text-overflow: ellipsis; + white-space: nowrap; +} + +.inference-settings-viz__bar-track { + height: 1.4rem; + overflow: hidden; + border: 1px solid #d8e3ed; + border-radius: 6px; + background: #edf4fa; +} + +.inference-settings-viz__bar-fill { + display: flex; + align-items: center; + justify-content: flex-end; + min-width: 0; + height: 100%; + border-radius: 5px; + color: #ffffff; + transition: + opacity 0.18s ease, + width 0.24s ease; +} + +.inference-settings-viz__bar-fill span { + padding: 0 0.36rem; + font-family: + ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", + "Courier New", monospace; + font-size: 0.68rem; + font-weight: 800; +} + +.inference-settings-viz__row[data-included="false"] + .inference-settings-viz__bar-fill { + opacity: 0.22; +} + +.inference-settings-viz__row[data-included="false"] + .inference-settings-viz__token { + color: #94a3b8; +} + +.inference-settings-viz__row-state { + color: #64748b; + font-size: 0.68rem; + font-weight: 800; + letter-spacing: 0.04em; + text-transform: uppercase; +} + +.inference-settings-viz__row[data-included="true"] + .inference-settings-viz__row-state { + color: #0f766e; +} + +.inference-settings-viz__actions { + display: flex; + flex-wrap: wrap; + align-items: center; + gap: 0.45rem; + margin-top: 0.9rem; +} + +.inference-settings-viz__actions button { + border: 1px solid #bad5e8; + border-radius: 8px; + background: #ffffff; + color: #0f4f76; + cursor: pointer; + font: inherit; + font-size: 0.82rem; + font-weight: 800; + line-height: 1; + padding: 0.55rem 0.7rem; +} + +.inference-settings-viz__actions button:first-child { + border-color: #0b72ba; + background: #0b72ba; + color: #ffffff; +} + +.inference-settings-viz__actions button:hover { + border-color: #0f4f76; +} + +.inference-settings-viz__actions span { + color: #64748b; + font-size: 0.78rem; + font-weight: 700; +} + .quantization-explorer { border: 1px solid #d7e4ef; border-radius: 16px; @@ -1920,6 +2425,23 @@ ol { grid-template-columns: repeat(2, minmax(0, 1fr)); } + .inference-settings-viz { + padding: 0.9rem; + } + + .inference-settings-viz__grid { + grid-template-columns: 1fr; + } + + .inference-settings-viz__row { + grid-template-columns: 3.75rem minmax(0, 1fr); + } + + .inference-settings-viz__row-state { + grid-column: 2; + margin-top: -0.22rem; + } + .objective5-chat__settings { grid-template-columns: 1fr; }