This commit is contained in:
2026-04-23 14:48:07 -06:00
parent f74575277a
commit 431e667c5e
9 changed files with 505 additions and 228 deletions
+15 -18
View File
@@ -1,7 +1,7 @@
--- ---
order: 2 order: 2
title: "Lab 2 - Quantization Tradeoffs: Comparing 2-bit, 4-bit, and 8-bit" title: "Lab 2 - Quantization Tradeoffs: Comparing 2-bit, 4-bit, and 8-bit"
description: Download Gemma 4 E2B in three GGUF quantizations and compare size, metadata, and output quality. description: Compare Gemma 4 E2B in three Ollama quantizations and study how lower precision changes behavior.
--- ---
<!-- breakout-style: instruction-rails --> <!-- breakout-style: instruction-rails -->
@@ -10,8 +10,8 @@ description: Download Gemma 4 E2B in three GGUF quantizations and compare size,
In this lab, we will: In this lab, we will:
- Download the same Gemma model in `UD-IQ2_M`, `Q4_K_M`, and `Q8_0` - Pull the same Gemma model in Q2, Q4, and Q8 Ollama variants
- Compare file size and GGUF metadata across those quantizations - Compare the quantization labels and model behavior across those variants
- Observe how lower precision changes the model's behavior - Observe how lower precision changes the model's behavior
- Build intuition for when a smaller quant may or may not be worth it - Build intuition for when a smaller quant may or may not be worth it
@@ -23,19 +23,15 @@ In this lab, we will:
## Objective 1: Understand the Model and the Quantizations ## Objective 1: Understand the Model and the Quantizations
For this lab, we will use the Hugging Face repository for **Unsloth's GGUF release of Gemma 4 E2B Instruct**: For this lab, we will use three Ollama-published variants of **Gemma 4 E2B** that represent distinct precision bands:
<https://huggingface.co/unsloth/gemma-4-E2B-it-GGUF> | Precision band | Ollama model tag | Why we are using it |
| -------------- | ----------------------------------- | --------------------------------------- |
| Q2 | `cajina/gemma4_e2b-q2_k_xl:v01` | Most aggressive compression in this lab |
| Q4 | `batiai/gemma4-e2b:q4` | Common middle-ground quant |
| Q8 | `bjoernb/gemma4-e2b-fast:latest` | Highest-quality quant in this lab |
This repository currently exposes multiple GGUF variants of the same base model. We will focus on one file from each of these precision bands: Even though the Ollama tags differ, these are all variants of the same underlying Gemma 4 E2B model family. The main variable we are changing is how the weights are stored.
| Precision band | GGUF file | Why we are using it | File Size |
| -------------- | ------------------------------ | --------------------------------------- |-----------|
| 2-bit | `gemma-4-E2B-it-UD-IQ2_M.gguf` | Most aggressive compression in this lab | 2.4 GB |
| 4-bit | `gemma-4-E2B-it-Q4_K_M.gguf` | Common middle-ground quant | 3.17 GB |
| 8-bit | `gemma-4-E2B-it-Q8_0.gguf` | Highest-quality quant in this lab | 5.05 GB |
Even though the filenames differ, these are all the same underlying instruction-tuned Gemma 4 E2B model. The main variable we are changing is how the weights are stored.
When we say these files are the same model, we mean that the overall neural network is still the same: When we say these files are the same model, we mean that the overall neural network is still the same:
@@ -97,10 +93,11 @@ The viewer below zooms out from one weight and instead shows a toy layer with 16
### Explore: Compare the same prompts through the hosted chat widget ### Explore: Compare the same prompts through the hosted chat widget
If your instructor provides an OpenAI-compatible endpoint, you can compare the same prompts through the embedded chat tool below: By default, the widget below points to the courseware-managed Ollama service and the three Lab 2 model tags above. You can still switch to another endpoint if your instructor provides one.
- Paste the lab endpoint and API key into the settings row - Use the preloaded managed endpoint or replace it with another compatible endpoint
- Switch between `Q8_0`, `Q4_K_M`, and `UD-IQ2_M` - Optionally add an API key if your chosen endpoint requires one
- Switch between the configured Q2, Q4, and Q8 Gemma variants
- Re-run the same prompt so you can compare coherence, stability, and SVG output - Re-run the same prompt so you can compare coherence, stability, and SVG output
- Try a visual prompt such as `Draw a pelican riding a bicycle.` - Try a visual prompt such as `Draw a pelican riding a bicycle.`
@@ -121,4 +118,4 @@ The important takeaway is not that one quant is always "best." The important tak
## Conclusion ## Conclusion
This lab isolates quantization as the main variable. By downloading **Gemma 4 E2B Instruct** in `UD-IQ2_M`, `Q4_K_M`, and `Q8_0`, you can directly observe one of the most important tradeoffs in local inference: balancing model quality against disk usage and resource constraints. This lab isolates quantization as the main variable. By comparing **Gemma 4 E2B** in Q2, Q4, and Q8 Ollama variants, you can directly observe one of the most important tradeoffs in local inference: balancing model quality against efficiency and resource constraints.
+40 -36
View File
@@ -4,9 +4,9 @@ import {
extractAssistantTextContent, extractAssistantTextContent,
extractObjective5Metrics, extractObjective5Metrics,
extractSvgMarkup, extractSvgMarkup,
getOllamaChatEndpointCandidates,
isLocalEndpoint, isLocalEndpoint,
looksLikeOllamaModel, looksLikeOllamaModel,
normalizeOllamaChatEndpoint,
normalizeUpstreamChatEndpoint, normalizeUpstreamChatEndpoint,
sanitizeSvgDocument, sanitizeSvgDocument,
type Objective5Message, type Objective5Message,
@@ -50,15 +50,6 @@ export async function POST(request: Request) {
); );
} }
if (!apiKey && !isLocalEndpoint(endpoint)) {
return NextResponse.json(
{
error: "An API key is required for remote endpoints.",
},
{ status: 400 },
);
}
if (!model) { if (!model) {
return NextResponse.json( return NextResponse.json(
{ {
@@ -79,11 +70,11 @@ export async function POST(request: Request) {
const useOllamaChat = looksLikeOllamaModel(model); const useOllamaChat = looksLikeOllamaModel(model);
const useLocalOpenAI = !useOllamaChat && isLocalEndpoint(endpoint); const useLocalOpenAI = !useOllamaChat && isLocalEndpoint(endpoint);
let upstreamUrl: string; let upstreamCandidates: string[];
try { try {
upstreamUrl = useOllamaChat upstreamCandidates = useOllamaChat
? normalizeOllamaChatEndpoint(endpoint) ? getOllamaChatEndpointCandidates(endpoint)
: normalizeUpstreamChatEndpoint(endpoint); : [normalizeUpstreamChatEndpoint(endpoint)];
} catch { } catch {
return NextResponse.json( return NextResponse.json(
{ {
@@ -98,6 +89,12 @@ export async function POST(request: Request) {
: useLocalOpenAI : useLocalOpenAI
? LOCAL_OPENAI_UPSTREAM_TIMEOUT_MS ? LOCAL_OPENAI_UPSTREAM_TIMEOUT_MS
: OPENAI_UPSTREAM_TIMEOUT_MS; : OPENAI_UPSTREAM_TIMEOUT_MS;
try {
let lastStatus = 502;
let lastMessage = "The chat request could not reach the upstream endpoint.";
for (const upstreamUrl of upstreamCandidates) {
const controller = new AbortController(); const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), upstreamTimeoutMs); const timeoutId = setTimeout(() => controller.abort(), upstreamTimeoutMs);
@@ -139,7 +136,8 @@ export async function POST(request: Request) {
} }
if (!upstreamResponse.ok) { if (!upstreamResponse.ok) {
const message = lastStatus = upstreamResponse.status;
lastMessage =
typeof parsedBody === "object" && typeof parsedBody === "object" &&
parsedBody !== null && parsedBody !== null &&
"error" in parsedBody && "error" in parsedBody &&
@@ -149,33 +147,21 @@ export async function POST(request: Request) {
typeof parsedBody.error.message === "string" typeof parsedBody.error.message === "string"
? parsedBody.error.message ? parsedBody.error.message
: `The upstream endpoint returned ${upstreamResponse.status}.`; : `The upstream endpoint returned ${upstreamResponse.status}.`;
continue;
return NextResponse.json(
{
error: message,
},
{ status: upstreamResponse.status },
);
} }
if (!parsedBody || typeof parsedBody !== "object") { if (!parsedBody || typeof parsedBody !== "object") {
return NextResponse.json( lastStatus = 502;
{ lastMessage = "The upstream endpoint returned an unreadable response.";
error: "The upstream endpoint returned an unreadable response.", continue;
},
{ status: 502 },
);
} }
const content = extractAssistantTextContent(parsedBody); const content = extractAssistantTextContent(parsedBody);
const metrics = extractObjective5Metrics(parsedBody); const metrics = extractObjective5Metrics(parsedBody);
if (!content) { if (!content) {
return NextResponse.json( lastStatus = 502;
{ lastMessage = "The upstream endpoint returned no assistant content.";
error: "The upstream endpoint returned no assistant content.", continue;
},
{ status: 502 },
);
} }
const svgMarkup = extractSvgMarkup(content); const svgMarkup = extractSvgMarkup(content);
@@ -206,6 +192,26 @@ export async function POST(request: Request) {
role: "assistant", role: "assistant",
svg: sanitizedSvg.svg, svg: sanitizedSvg.svg,
}); });
} catch (caughtError) {
if (caughtError instanceof Error && caughtError.name === "AbortError") {
lastStatus = 504;
lastMessage = `The upstream endpoint timed out after ${Math.floor(upstreamTimeoutMs / 1000)} seconds.`;
continue;
}
lastStatus = 502;
lastMessage = "The chat request could not reach the upstream endpoint.";
} finally {
clearTimeout(timeoutId);
}
}
return NextResponse.json(
{
error: lastMessage,
},
{ status: lastStatus },
);
} catch (caughtError) { } catch (caughtError) {
if (caughtError instanceof Error && caughtError.name === "AbortError") { if (caughtError instanceof Error && caughtError.name === "AbortError") {
return NextResponse.json( return NextResponse.json(
@@ -222,7 +228,5 @@ export async function POST(request: Request) {
}, },
{ status: 502 }, { status: 502 },
); );
} finally {
clearTimeout(timeoutId);
} }
} }
-10
View File
@@ -3,7 +3,6 @@ import {
extractModelOptions, extractModelOptions,
getDefaultObjective5ModelOptions, getDefaultObjective5ModelOptions,
getModelListEndpointCandidates, getModelListEndpointCandidates,
isLocalEndpoint,
} from "~/lib/lab2-chat"; } from "~/lib/lab2-chat";
type ModelsRouteRequestBody = { type ModelsRouteRequestBody = {
@@ -39,15 +38,6 @@ export async function POST(request: Request) {
); );
} }
if (!apiKey && !isLocalEndpoint(endpoint)) {
return NextResponse.json(
{
error: "An API key is required for remote endpoints.",
},
{ status: 400 },
);
}
let candidates: string[]; let candidates: string[];
try { try {
candidates = getModelListEndpointCandidates(endpoint); candidates = getModelListEndpointCandidates(endpoint);
+7
View File
@@ -13,6 +13,13 @@ describe("LabContent", () => {
new Response( new Response(
JSON.stringify({ JSON.stringify({
lab1NetronUrl: "http://127.0.0.1:8338", lab1NetronUrl: "http://127.0.0.1:8338",
lab2OllamaModels: [
{
label: "Gemma 4 E2B Q2",
value: "cajina/gemma4_e2b-q2_k_xl:v01",
},
],
lab2OllamaUrl: "http://127.0.0.1:11434",
lab3TerminalUrl: "http://127.0.0.1:7681/wetty", lab3TerminalUrl: "http://127.0.0.1:7681/wetty",
}), }),
{ status: 200 }, { status: 200 },
+71 -9
View File
@@ -16,11 +16,45 @@ describe("Objective5Chat", () => {
const fetchMock = vi.fn(async (input: RequestInfo | URL) => { const fetchMock = vi.fn(async (input: RequestInfo | URL) => {
const url = String(input); const url = String(input);
if (url === "/courseware-runtime.json") {
return new Response(
JSON.stringify({
lab2OllamaModels: [
{
label: "Gemma 4 E2B Q2",
value: "cajina/gemma4_e2b-q2_k_xl:v01",
},
{
label: "Gemma 4 E2B Q4",
value: "batiai/gemma4-e2b:q4",
},
{
label: "Gemma 4 E2B Q8",
value: "bjoernb/gemma4-e2b-fast:latest",
},
],
lab2OllamaUrl: "http://127.0.0.1:11434",
}),
{ status: 200 },
);
}
if (url === "/api/lab2/models") { if (url === "/api/lab2/models") {
return { return {
json: async () => ({ json: async () => ({
models: [ models: [
{ label: "LM Studio Qwen", value: "qwen3.5-9b-mlx" }, {
label: "Gemma 4 E2B Q2",
value: "cajina/gemma4_e2b-q2_k_xl:v01",
},
{
label: "Gemma 4 E2B Q4",
value: "batiai/gemma4-e2b:q4",
},
{
label: "Gemma 4 E2B Q8",
value: "bjoernb/gemma4-e2b-fast:latest",
},
{ label: "Custom model", value: LAB2_CUSTOM_MODEL_VALUE }, { label: "Custom model", value: LAB2_CUSTOM_MODEL_VALUE },
], ],
}), }),
@@ -74,8 +108,12 @@ describe("Objective5Chat", () => {
}); });
it("persists settings updates back to localStorage", async () => { it("persists settings updates back to localStorage", async () => {
mockFetch();
render(<Objective5Chat />); render(<Objective5Chat />);
await screen.findByLabelText("Endpoint");
fireEvent.change(screen.getByLabelText("Endpoint"), { fireEvent.change(screen.getByLabelText("Endpoint"), {
target: { value: "https://saved.example/api" }, target: { value: "https://saved.example/api" },
}); });
@@ -95,7 +133,9 @@ describe("Objective5Chat", () => {
target: { value: "http://127.0.0.1:1234" }, target: { value: "http://127.0.0.1:1234" },
}); });
expect(await screen.findByRole("option", { name: "LM Studio Qwen" })).toBeInTheDocument(); expect(
await screen.findByRole("option", { name: "Gemma 4 E2B Q4" }),
).toBeInTheDocument();
expect(fetchMock).toHaveBeenCalledWith( expect(fetchMock).toHaveBeenCalledWith(
"/api/lab2/models", "/api/lab2/models",
expect.objectContaining({ expect.objectContaining({
@@ -109,9 +149,6 @@ describe("Objective5Chat", () => {
render(<Objective5Chat />); render(<Objective5Chat />);
fireEvent.change(screen.getByLabelText("API key"), {
target: { value: "sk-test" },
});
fireEvent.change(screen.getByLabelText("Prompt"), { fireEvent.change(screen.getByLabelText("Prompt"), {
target: { value: "Compare these quantized models." }, target: { value: "Compare these quantized models." },
}); });
@@ -129,11 +166,26 @@ describe("Objective5Chat", () => {
vi.fn(async (input: RequestInfo | URL) => { vi.fn(async (input: RequestInfo | URL) => {
const url = String(input); const url = String(input);
if (url === "/courseware-runtime.json") {
return new Response(
JSON.stringify({
lab2OllamaModels: [
{
label: "Gemma 4 E2B Q4",
value: "batiai/gemma4-e2b:q4",
},
],
lab2OllamaUrl: "http://127.0.0.1:11434",
}),
{ status: 200 },
);
}
if (url === "/api/lab2/models") { if (url === "/api/lab2/models") {
return { return {
json: async () => ({ json: async () => ({
models: [ models: [
{ label: "Gemma 4 E2B Q4_K_M", value: "gemma4:e2b-it-q4_K_M" }, { label: "Gemma 4 E2B Q4", value: "batiai/gemma4-e2b:q4" },
{ label: "Custom model", value: LAB2_CUSTOM_MODEL_VALUE }, { label: "Custom model", value: LAB2_CUSTOM_MODEL_VALUE },
], ],
}), }),
@@ -160,9 +212,6 @@ describe("Objective5Chat", () => {
render(<Objective5Chat />); render(<Objective5Chat />);
fireEvent.change(screen.getByLabelText("API key"), {
target: { value: "sk-test" },
});
fireEvent.change(screen.getByLabelText("Prompt"), { fireEvent.change(screen.getByLabelText("Prompt"), {
target: { value: "Draw a pelican riding a bicycle." }, target: { value: "Draw a pelican riding a bicycle." },
}); });
@@ -174,4 +223,17 @@ describe("Objective5Chat", () => {
expect(screen.getByText("View SVG source")).toBeInTheDocument(); expect(screen.getByText("View SVG source")).toBeInTheDocument();
expect(screen.getByText("Tokens/sec 14.4")).toBeInTheDocument(); expect(screen.getByText("Tokens/sec 14.4")).toBeInTheDocument();
}); });
it("defaults to the managed Ollama runtime endpoint and models", async () => {
mockFetch();
render(<Objective5Chat />);
expect(await screen.findByLabelText("Endpoint")).toHaveValue(
"http://localhost:11434/",
);
expect(screen.getByLabelText("Model")).toHaveValue(
"cajina/gemma4_e2b-q2_k_xl:v01",
);
});
}); });
+73 -20
View File
@@ -1,14 +1,18 @@
"use client"; "use client";
import { FormEvent, useCallback, useEffect, useMemo, useState } from "react"; import { FormEvent, useCallback, useEffect, useMemo, useState } from "react";
import {
fetchCoursewareRuntimeConfig,
normalizeCoursewareRuntimeConfig,
} from "~/lib/courseware-runtime";
import { import {
getActiveModel, getActiveModel,
getDefaultObjective5ModelOptions, getDefaultObjective5ModelOptions,
getDefaultObjective5Settings, getDefaultObjective5Settings,
isLocalEndpoint,
LAB2_CHAT_STORAGE_KEY, LAB2_CHAT_STORAGE_KEY,
LAB2_CUSTOM_MODEL_VALUE, LAB2_CUSTOM_MODEL_VALUE,
LAB2_DEFAULT_ENDPOINT, LAB2_DEFAULT_ENDPOINT,
LAB2_LEGACY_DEFAULT_ENDPOINT,
type Objective5ModelOption, type Objective5ModelOption,
type Objective5Metrics, type Objective5Metrics,
type Objective5Message, type Objective5Message,
@@ -93,8 +97,21 @@ function toApiConversation(messages: ChatTurn[]) {
} }
export function Objective5Chat() { export function Objective5Chat() {
const defaults = useMemo(() => getDefaultObjective5Settings(), []); const runtimeDefaults = useMemo(() => normalizeCoursewareRuntimeConfig(), []);
const defaultModelOptions = useMemo(() => getDefaultObjective5ModelOptions(), []); const defaults = useMemo(
() => ({
...getDefaultObjective5Settings(),
endpoint: runtimeDefaults.lab2OllamaUrl,
selectedModel:
runtimeDefaults.lab2OllamaModels[0]?.value ??
getDefaultObjective5Settings().selectedModel,
}),
[runtimeDefaults.lab2OllamaModels, runtimeDefaults.lab2OllamaUrl],
);
const defaultModelOptions = useMemo(
() => ensureCustomOption(runtimeDefaults.lab2OllamaModels),
[runtimeDefaults.lab2OllamaModels],
);
const [endpoint, setEndpoint] = useState(defaults.endpoint); const [endpoint, setEndpoint] = useState(defaults.endpoint);
const [apiKey, setApiKey] = useState(defaults.apiKey); const [apiKey, setApiKey] = useState(defaults.apiKey);
const [selectedModel, setSelectedModel] = useState(defaults.selectedModel); const [selectedModel, setSelectedModel] = useState(defaults.selectedModel);
@@ -112,23 +129,70 @@ export function Objective5Chat() {
const activeModel = getActiveModel(selectedModel, customModel); const activeModel = getActiveModel(selectedModel, customModel);
useEffect(() => { useEffect(() => {
let isCancelled = false;
async function loadInitialSettings() {
const resolvedRuntime = await fetchCoursewareRuntimeConfig().catch(() =>
normalizeCoursewareRuntimeConfig(),
);
if (isCancelled) {
return;
}
const resolvedModelOptions = ensureCustomOption(
resolvedRuntime.lab2OllamaModels,
);
const resolvedSelectedModel =
resolvedRuntime.lab2OllamaModels[0]?.value ?? defaults.selectedModel;
setModelOptions(resolvedModelOptions);
try { try {
const savedSettings = window.localStorage.getItem(LAB2_CHAT_STORAGE_KEY); const savedSettings = window.localStorage.getItem(LAB2_CHAT_STORAGE_KEY);
if (!savedSettings) { if (!savedSettings) {
setEndpoint(resolvedRuntime.lab2OllamaUrl);
setSelectedModel(resolvedSelectedModel);
setCustomModel("");
setApiKey("");
setHasLoadedSettings(true); setHasLoadedSettings(true);
return; return;
} }
const parsed = JSON.parse(savedSettings) as Partial<typeof defaults>; const parsed = JSON.parse(savedSettings) as Partial<typeof defaults>;
setEndpoint(parsed.endpoint?.trim() || LAB2_DEFAULT_ENDPOINT); const savedEndpoint = parsed.endpoint?.trim();
const nextEndpoint =
!savedEndpoint || savedEndpoint === LAB2_LEGACY_DEFAULT_ENDPOINT
? resolvedRuntime.lab2OllamaUrl
: savedEndpoint;
const savedSelectedModel = parsed.selectedModel?.trim();
const nextSelectedModel =
savedSelectedModel === LAB2_CUSTOM_MODEL_VALUE ||
resolvedModelOptions.some(
(option) => option.value === savedSelectedModel,
)
? savedSelectedModel || resolvedSelectedModel
: resolvedSelectedModel;
setEndpoint(nextEndpoint);
setApiKey(parsed.apiKey ?? ""); setApiKey(parsed.apiKey ?? "");
setSelectedModel(parsed.selectedModel?.trim() || defaults.selectedModel); setSelectedModel(nextSelectedModel);
setCustomModel(parsed.customModel?.trim() || ""); setCustomModel(parsed.customModel?.trim() || "");
} catch { } catch {
window.localStorage.removeItem(LAB2_CHAT_STORAGE_KEY); window.localStorage.removeItem(LAB2_CHAT_STORAGE_KEY);
setEndpoint(resolvedRuntime.lab2OllamaUrl);
setSelectedModel(resolvedSelectedModel);
setCustomModel("");
setApiKey("");
} finally { } finally {
setHasLoadedSettings(true); setHasLoadedSettings(true);
} }
}
void loadInitialSettings();
return () => {
isCancelled = true;
};
}, [defaults.selectedModel]); }, [defaults.selectedModel]);
useEffect(() => { useEffect(() => {
@@ -154,11 +218,6 @@ export function Objective5Chat() {
return; return;
} }
if (!trimmedKey && !isLocalEndpoint(trimmedEndpoint)) {
setModelError("Enter an API key before refreshing remote models.");
return;
}
setIsRefreshingModels(true); setIsRefreshingModels(true);
setModelError(null); setModelError(null);
@@ -210,7 +269,6 @@ export function Objective5Chat() {
useEffect(() => { useEffect(() => {
if (!hasLoadedSettings) return; if (!hasLoadedSettings) return;
if (!endpoint.trim()) return; if (!endpoint.trim()) return;
if (!apiKey.trim() && !isLocalEndpoint(endpoint.trim())) return;
void refreshModels(); void refreshModels();
}, [apiKey, endpoint, hasLoadedSettings, refreshModels]); }, [apiKey, endpoint, hasLoadedSettings, refreshModels]);
@@ -227,11 +285,6 @@ export function Objective5Chat() {
return; return;
} }
if (!trimmedKey && !isLocalEndpoint(trimmedEndpoint)) {
setError("Enter an API key before sending a prompt to a remote endpoint.");
return;
}
if (!activeModel) { if (!activeModel) {
setError("Choose one of the quantized models or enter a custom model name."); setError("Choose one of the quantized models or enter a custom model name.");
return; return;
@@ -305,7 +358,7 @@ export function Objective5Chat() {
<section className="objective5-chat" data-widget-enhanced="true"> <section className="objective5-chat" data-widget-enhanced="true">
<div className="objective5-chat__header"> <div className="objective5-chat__header">
<p className="objective5-chat__eyebrow">Objective 5 Lab Widget</p> <p className="objective5-chat__eyebrow">Objective 5 Lab Widget</p>
<h3>Compare qualitative output with a hosted chat endpoint</h3> <h3>Compare qualitative output with the managed Ollama endpoint</h3>
<p className="objective5-chat__lede"> <p className="objective5-chat__lede">
Switch between quantized models, reuse the same prompt, and ask for Switch between quantized models, reuse the same prompt, and ask for
text or simple SVG sketches like{" "} text or simple SVG sketches like{" "}
@@ -379,9 +432,9 @@ export function Objective5Chat() {
</div> </div>
<p className="objective5-chat__settings-note"> <p className="objective5-chat__settings-note">
Settings stay in your browser for this lab only. Available models are This widget starts on the courseware Ollama service by default. You can
refreshed from the configured endpoint, and changing the model does not still swap the endpoint, add an API key, refresh the available models,
clear the transcript. and change models without clearing the transcript.
</p> </p>
{modelError ? ( {modelError ? (
+68
View File
@@ -1,14 +1,38 @@
export const COURSEWARE_RUNTIME_CONFIG_PATH = "/courseware-runtime.json"; export const COURSEWARE_RUNTIME_CONFIG_PATH = "/courseware-runtime.json";
export const LAB1_DEFAULT_NETRON_URL = "http://127.0.0.1:8338"; export const LAB1_DEFAULT_NETRON_URL = "http://127.0.0.1:8338";
export const LAB2_DEFAULT_OLLAMA_URL = "http://127.0.0.1:11434";
export const LAB2_DEFAULT_OLLAMA_MODELS = [
{
label: "Gemma 4 E2B Q2",
value: "cajina/gemma4_e2b-q2_k_xl:v01",
},
{
label: "Gemma 4 E2B Q4",
value: "batiai/gemma4-e2b:q4",
},
{
label: "Gemma 4 E2B Q8",
value: "bjoernb/gemma4-e2b-fast:latest",
},
] as const;
export const LAB3_DEFAULT_TERMINAL_PATH = "/wetty"; export const LAB3_DEFAULT_TERMINAL_PATH = "/wetty";
export type CoursewareRuntimeModelOption = {
label: string;
value: string;
};
export type CoursewareRuntimeConfig = { export type CoursewareRuntimeConfig = {
lab1NetronUrl?: string; lab1NetronUrl?: string;
lab2OllamaModels?: CoursewareRuntimeModelOption[];
lab2OllamaUrl?: string;
lab3TerminalUrl?: string; lab3TerminalUrl?: string;
}; };
export type ResolvedCoursewareRuntimeConfig = { export type ResolvedCoursewareRuntimeConfig = {
lab1NetronUrl: string; lab1NetronUrl: string;
lab2OllamaModels: CoursewareRuntimeModelOption[];
lab2OllamaUrl: string;
lab3TerminalUrl: string; lab3TerminalUrl: string;
}; };
@@ -50,6 +74,48 @@ export function getLab1NetronUrl(
return rewriteLoopbackHost(trimmedValue, currentHostname); return rewriteLoopbackHost(trimmedValue, currentHostname);
} }
export function getLab2OllamaUrl(
envValue?: string,
currentHostname = getCurrentHostname(),
) {
const trimmedValue = envValue?.trim();
if (!trimmedValue) {
return rewriteLoopbackHost(LAB2_DEFAULT_OLLAMA_URL, currentHostname);
}
return rewriteLoopbackHost(trimmedValue, currentHostname);
}
export function getLab2OllamaModels(
envValue?: CoursewareRuntimeModelOption[],
) {
if (!Array.isArray(envValue) || envValue.length === 0) {
return LAB2_DEFAULT_OLLAMA_MODELS.map((model) => ({ ...model }));
}
const normalizedModels = envValue
.map((model) => {
const label = model?.label?.trim();
const value = model?.value?.trim();
if (!label || !value) {
return null;
}
return { label, value } satisfies CoursewareRuntimeModelOption;
})
.filter(
(model): model is CoursewareRuntimeModelOption => model !== null,
);
if (normalizedModels.length === 0) {
return LAB2_DEFAULT_OLLAMA_MODELS.map((model) => ({ ...model }));
}
return normalizedModels;
}
export function getLab3TerminalPath( export function getLab3TerminalPath(
envValue?: string, envValue?: string,
currentHostname = getCurrentHostname(), currentHostname = getCurrentHostname(),
@@ -73,6 +139,8 @@ export function normalizeCoursewareRuntimeConfig(
): ResolvedCoursewareRuntimeConfig { ): ResolvedCoursewareRuntimeConfig {
return { return {
lab1NetronUrl: getLab1NetronUrl(config?.lab1NetronUrl, currentHostname), lab1NetronUrl: getLab1NetronUrl(config?.lab1NetronUrl, currentHostname),
lab2OllamaModels: getLab2OllamaModels(config?.lab2OllamaModels),
lab2OllamaUrl: getLab2OllamaUrl(config?.lab2OllamaUrl, currentHostname),
lab3TerminalUrl: getLab3TerminalPath( lab3TerminalUrl: getLab3TerminalPath(
config?.lab3TerminalUrl, config?.lab3TerminalUrl,
currentHostname, currentHostname,
+37 -6
View File
@@ -4,6 +4,7 @@ import {
extractObjective5Metrics, extractObjective5Metrics,
extractModelOptions, extractModelOptions,
extractSvgMarkup, extractSvgMarkup,
getOllamaChatEndpointCandidates,
getModelListEndpointCandidates, getModelListEndpointCandidates,
isLocalEndpoint, isLocalEndpoint,
normalizeOllamaChatEndpoint, normalizeOllamaChatEndpoint,
@@ -38,21 +39,31 @@ describe("extractSvgMarkup", () => {
}); });
describe("normalizeOllamaChatEndpoint", () => { describe("normalizeOllamaChatEndpoint", () => {
it("appends the ollama chat path to a base api endpoint", () => { it("prefers the native ollama chat path for a bare ollama endpoint", () => {
expect(normalizeOllamaChatEndpoint("https://ai.zuccaro.me/api")).toBe( expect(normalizeOllamaChatEndpoint("http://127.0.0.1:11434")).toBe(
"https://ai.zuccaro.me/ollama/api/chat", "http://127.0.0.1:11434/api/chat",
); );
}); });
}); });
describe("getModelListEndpointCandidates", () => { describe("getModelListEndpointCandidates", () => {
it("prefers v1 models for bare local endpoints", () => { it("tries native ollama tags before openai model listings for bare local endpoints", () => {
expect(getModelListEndpointCandidates("http://127.0.0.1:1234")).toEqual([ expect(getModelListEndpointCandidates("http://127.0.0.1:1234")).toEqual([
"http://127.0.0.1:1234/api/tags",
"http://127.0.0.1:1234/v1/models", "http://127.0.0.1:1234/v1/models",
]); ]);
}); });
}); });
describe("getOllamaChatEndpointCandidates", () => {
it("tries native and proxied ollama chat routes", () => {
expect(getOllamaChatEndpointCandidates("http://127.0.0.1:8080")).toEqual([
"http://127.0.0.1:8080/api/chat",
"http://127.0.0.1:8080/ollama/api/chat",
]);
});
});
describe("isLocalEndpoint", () => { describe("isLocalEndpoint", () => {
it("detects localhost endpoints", () => { it("detects localhost endpoints", () => {
expect(isLocalEndpoint("http://127.0.0.1:1234")).toBe(true); expect(isLocalEndpoint("http://127.0.0.1:1234")).toBe(true);
@@ -115,12 +126,32 @@ describe("extractModelOptions", () => {
extractModelOptions({ extractModelOptions({
data: [ data: [
{ id: "qwen3.5-9b-mlx", object: "model" }, { id: "qwen3.5-9b-mlx", object: "model" },
{ id: "gemma4:e2b-it-q4_K_M", name: "Gemma 4 E2B Q4_K_M" }, { id: "batiai/gemma4-e2b:q4", name: "Gemma 4 E2B Q4" },
], ],
}), }),
).toEqual([ ).toEqual([
{ label: "qwen3.5-9b-mlx", value: "qwen3.5-9b-mlx" }, { label: "qwen3.5-9b-mlx", value: "qwen3.5-9b-mlx" },
{ label: "Gemma 4 E2B Q4_K_M", value: "gemma4:e2b-it-q4_K_M" }, { label: "Gemma 4 E2B Q4", value: "batiai/gemma4-e2b:q4" },
]);
});
it("maps ollama tag payloads into dropdown options", () => {
expect(
extractModelOptions({
models: [
{ model: "cajina/gemma4_e2b-q2_k_xl:v01" },
{ name: "bjoernb/gemma4-e2b-fast:latest" },
],
}),
).toEqual([
{
label: "Gemma 4 E2B Q2",
value: "cajina/gemma4_e2b-q2_k_xl:v01",
},
{
label: "Gemma 4 E2B Q8",
value: "bjoernb/gemma4-e2b-fast:latest",
},
]); ]);
}); });
}); });
+92 -27
View File
@@ -3,23 +3,21 @@ import {
XMLSerializer, XMLSerializer,
type Element as XmlDomElement, type Element as XmlDomElement,
} from "@xmldom/xmldom"; } from "@xmldom/xmldom";
import {
LAB2_DEFAULT_OLLAMA_MODELS,
LAB2_DEFAULT_OLLAMA_URL,
} from "~/lib/courseware-runtime";
export const LAB2_CHAT_STORAGE_KEY = "lab2-objective5-chat-settings"; export const LAB2_CHAT_STORAGE_KEY = "lab2-objective5-chat-settings";
export const LAB2_DEFAULT_ENDPOINT = "https://ai.zuccaro.me/api"; export const LAB2_DEFAULT_ENDPOINT = LAB2_DEFAULT_OLLAMA_URL;
export const LAB2_LEGACY_DEFAULT_ENDPOINT = "https://ai.zuccaro.me/api";
export const LAB2_CUSTOM_MODEL_VALUE = "__custom__"; export const LAB2_CUSTOM_MODEL_VALUE = "__custom__";
export const LAB2_MAX_CONTEXT_MESSAGES = 10; export const LAB2_MAX_CONTEXT_MESSAGES = 10;
export const LAB2_MAX_MESSAGE_LENGTH = 4000; export const LAB2_MAX_MESSAGE_LENGTH = 4000;
export const LAB2_MAX_SVG_LENGTH = 20000; export const LAB2_MAX_SVG_LENGTH = 20000;
export const LAB2_MODEL_OPTIONS = [ export const LAB2_MODEL_OPTIONS = [
{ ...LAB2_DEFAULT_OLLAMA_MODELS,
label: "Gemma 4 E2B Q8_0",
value: "gemma4:e2b-it-q8_0",
},
{
label: "Gemma 4 E2B Q4_K_M",
value: "gemma4:e2b-it-q4_K_M",
},
{ {
label: "Custom model", label: "Custom model",
value: LAB2_CUSTOM_MODEL_VALUE, value: LAB2_CUSTOM_MODEL_VALUE,
@@ -111,6 +109,10 @@ export type SvgSanitizationResult =
| SvgSanitizationFailure | SvgSanitizationFailure
| SvgSanitizationSuccess; | SvgSanitizationSuccess;
const predefinedModelLabels = new Map<string, string>(
LAB2_DEFAULT_OLLAMA_MODELS.map((model) => [model.value, model.label]),
);
const SVG_NAMESPACE = "http://www.w3.org/2000/svg"; const SVG_NAMESPACE = "http://www.w3.org/2000/svg";
const allowedSvgElements = new Set([ const allowedSvgElements = new Set([
"svg", "svg",
@@ -246,24 +248,35 @@ export function getModelListEndpointCandidates(endpoint: string) {
const url = new URL(endpoint); const url = new URL(endpoint);
const trimmedPath = url.pathname.replace(/\/+$/, ""); const trimmedPath = url.pathname.replace(/\/+$/, "");
if (trimmedPath.endsWith("/models")) { if (
trimmedPath.endsWith("/models") ||
trimmedPath.endsWith("/api/tags")
) {
url.hash = ""; url.hash = "";
return [url.toString()]; return [url.toString()];
} }
const paths = new Set<string>(); const paths = new Set<string>();
if (trimmedPath.endsWith("/api")) { if (trimmedPath.endsWith("/ollama/api")) {
paths.add("/ollama/api/tags");
} else if (trimmedPath.endsWith("/ollama")) {
paths.add("/ollama/api/tags");
} else if (trimmedPath.endsWith("/api")) {
paths.add("/api/tags");
paths.add("/api/v1/models"); paths.add("/api/v1/models");
paths.add("/api/models"); paths.add("/api/models");
} else if (trimmedPath.endsWith("/api/v1")) { } else if (trimmedPath.endsWith("/api/v1")) {
paths.add("/api/tags");
paths.add("/api/v1/models"); paths.add("/api/v1/models");
paths.add("/api/models"); paths.add("/api/models");
} else if (trimmedPath.endsWith("/v1")) { } else if (trimmedPath.endsWith("/v1")) {
paths.add("/v1/models"); paths.add("/v1/models");
} else if (trimmedPath.length === 0) { } else if (trimmedPath.length === 0) {
paths.add("/api/tags");
paths.add("/v1/models"); paths.add("/v1/models");
} else { } else {
paths.add(`${trimmedPath}/api/tags`);
paths.add(`${trimmedPath}/v1/models`); paths.add(`${trimmedPath}/v1/models`);
paths.add(`${trimmedPath}/models`); paths.add(`${trimmedPath}/models`);
} }
@@ -277,21 +290,48 @@ export function getModelListEndpointCandidates(endpoint: string) {
} }
export function normalizeOllamaChatEndpoint(endpoint: string) { export function normalizeOllamaChatEndpoint(endpoint: string) {
return getOllamaChatEndpointCandidates(endpoint)[0];
}
export function getOllamaChatEndpointCandidates(endpoint: string) {
const url = new URL(endpoint); const url = new URL(endpoint);
const trimmedPath = url.pathname.replace(/\/+$/, ""); const trimmedPath = url.pathname.replace(/\/+$/, "");
if (trimmedPath.endsWith("/ollama/api/chat")) { if (
trimmedPath.endsWith("/api/chat") ||
trimmedPath.endsWith("/ollama/api/chat")
) {
url.pathname = trimmedPath; url.pathname = trimmedPath;
} else if (trimmedPath.endsWith("/api") || trimmedPath.endsWith("/api/v1")) { url.hash = "";
url.pathname = "/ollama/api/chat"; return [url.toString()];
} else if (trimmedPath.length === 0) {
url.pathname = "/ollama/api/chat";
} else {
url.pathname = `${trimmedPath}/ollama/api/chat`;
} }
url.hash = ""; const paths = new Set<string>();
return url.toString();
if (trimmedPath.endsWith("/ollama/api")) {
paths.add("/ollama/api/chat");
} else if (trimmedPath.endsWith("/ollama")) {
paths.add("/ollama/api/chat");
} else if (trimmedPath.endsWith("/api")) {
paths.add("/api/chat");
paths.add("/ollama/api/chat");
} else if (trimmedPath.endsWith("/api/v1") || trimmedPath.endsWith("/v1")) {
paths.add("/ollama/api/chat");
paths.add("/api/chat");
} else if (trimmedPath.length === 0) {
paths.add("/api/chat");
paths.add("/ollama/api/chat");
} else {
paths.add(`${trimmedPath}/api/chat`);
paths.add(`${trimmedPath}/ollama/api/chat`);
}
return Array.from(paths).map((path) => {
const candidate = new URL(url.toString());
candidate.pathname = path;
candidate.hash = "";
return candidate.toString();
});
} }
export function looksLikeOllamaModel(model: string) { export function looksLikeOllamaModel(model: string) {
@@ -373,15 +413,11 @@ export function extractAssistantTextContent(payload: ChatCompletionPayload) {
} }
export function extractModelOptions(payload: unknown): Objective5ModelOption[] { export function extractModelOptions(payload: unknown): Objective5ModelOption[] {
if ( if (!payload || typeof payload !== "object") {
!payload ||
typeof payload !== "object" ||
!("data" in payload) ||
!Array.isArray(payload.data)
) {
return []; return [];
} }
if ("data" in payload && Array.isArray(payload.data)) {
return payload.data return payload.data
.map((item) => { .map((item) => {
if (!item || typeof item !== "object") return null; if (!item || typeof item !== "object") return null;
@@ -391,7 +427,7 @@ export function extractModelOptions(payload: unknown): Objective5ModelOption[] {
const label = const label =
"name" in item && typeof item.name === "string" && item.name.trim() "name" in item && typeof item.name === "string" && item.name.trim()
? item.name.trim() ? item.name.trim()
: value; : getModelLabel(value);
if (!value) return null; if (!value) return null;
return { label, value } satisfies Objective5ModelOption; return { label, value } satisfies Objective5ModelOption;
@@ -399,6 +435,31 @@ export function extractModelOptions(payload: unknown): Objective5ModelOption[] {
.filter((item): item is Objective5ModelOption => item !== null); .filter((item): item is Objective5ModelOption => item !== null);
} }
if (!("models" in payload) || !Array.isArray(payload.models)) {
return [];
}
return payload.models
.map((item) => {
if (!item || typeof item !== "object") return null;
const value =
("model" in item && typeof item.model === "string" && item.model.trim()
? item.model
: "name" in item && typeof item.name === "string"
? item.name
: ""
).trim();
if (!value) return null;
return {
label: getModelLabel(value),
value,
} satisfies Objective5ModelOption;
})
.filter((item): item is Objective5ModelOption => item !== null);
}
export function extractObjective5Metrics( export function extractObjective5Metrics(
payload: ChatCompletionPayload, payload: ChatCompletionPayload,
): Objective5Metrics | null { ): Objective5Metrics | null {
@@ -600,6 +661,10 @@ export function getDefaultObjective5ModelOptions(): Objective5ModelOption[] {
return [...LAB2_MODEL_OPTIONS]; return [...LAB2_MODEL_OPTIONS];
} }
function getModelLabel(value: string) {
return predefinedModelLabels.get(value) ?? value;
}
function validateSvgNode(node: XmlDomElement): string | null { function validateSvgNode(node: XmlDomElement): string | null {
if (!allowedSvgElements.has(node.tagName)) { if (!allowedSvgElements.has(node.tagName)) {
return `The SVG used a blocked element: <${node.tagName}>.`; return `The SVG used a blocked element: <${node.tagName}>.`;