This commit is contained in:
2026-04-23 14:48:07 -06:00
parent f74575277a
commit 431e667c5e
9 changed files with 505 additions and 228 deletions
+15 -18
View File
@@ -1,7 +1,7 @@
---
order: 2
title: "Lab 2 - Quantization Tradeoffs: Comparing 2-bit, 4-bit, and 8-bit"
description: Download Gemma 4 E2B in three GGUF quantizations and compare size, metadata, and output quality.
description: Compare Gemma 4 E2B in three Ollama quantizations and study how lower precision changes behavior.
---
<!-- breakout-style: instruction-rails -->
@@ -10,8 +10,8 @@ description: Download Gemma 4 E2B in three GGUF quantizations and compare size,
In this lab, we will:
- Download the same Gemma model in `UD-IQ2_M`, `Q4_K_M`, and `Q8_0`
- Compare file size and GGUF metadata across those quantizations
- Pull the same Gemma model in Q2, Q4, and Q8 Ollama variants
- Compare the quantization labels and model behavior across those variants
- Observe how lower precision changes the model's behavior
- Build intuition for when a smaller quant may or may not be worth it
@@ -23,19 +23,15 @@ In this lab, we will:
## Objective 1: Understand the Model and the Quantizations
For this lab, we will use the Hugging Face repository for **Unsloth's GGUF release of Gemma 4 E2B Instruct**:
For this lab, we will use three Ollama-published variants of **Gemma 4 E2B** that represent distinct precision bands:
<https://huggingface.co/unsloth/gemma-4-E2B-it-GGUF>
| Precision band | Ollama model tag | Why we are using it |
| -------------- | ----------------------------------- | --------------------------------------- |
| Q2 | `cajina/gemma4_e2b-q2_k_xl:v01` | Most aggressive compression in this lab |
| Q4 | `batiai/gemma4-e2b:q4` | Common middle-ground quant |
| Q8 | `bjoernb/gemma4-e2b-fast:latest` | Highest-quality quant in this lab |
This repository currently exposes multiple GGUF variants of the same base model. We will focus on one file from each of these precision bands:
| Precision band | GGUF file | Why we are using it | File Size |
| -------------- | ------------------------------ | --------------------------------------- |-----------|
| 2-bit | `gemma-4-E2B-it-UD-IQ2_M.gguf` | Most aggressive compression in this lab | 2.4 GB |
| 4-bit | `gemma-4-E2B-it-Q4_K_M.gguf` | Common middle-ground quant | 3.17 GB |
| 8-bit | `gemma-4-E2B-it-Q8_0.gguf` | Highest-quality quant in this lab | 5.05 GB |
Even though the filenames differ, these are all the same underlying instruction-tuned Gemma 4 E2B model. The main variable we are changing is how the weights are stored.
Even though the Ollama tags differ, these are all variants of the same underlying Gemma 4 E2B model family. The main variable we are changing is how the weights are stored.
When we say these files are the same model, we mean that the overall neural network is still the same:
@@ -97,10 +93,11 @@ The viewer below zooms out from one weight and instead shows a toy layer with 16
### Explore: Compare the same prompts through the hosted chat widget
If your instructor provides an OpenAI-compatible endpoint, you can compare the same prompts through the embedded chat tool below:
By default, the widget below points to the courseware-managed Ollama service and the three Lab 2 model tags above. You can still switch to another endpoint if your instructor provides one.
- Paste the lab endpoint and API key into the settings row
- Switch between `Q8_0`, `Q4_K_M`, and `UD-IQ2_M`
- Use the preloaded managed endpoint or replace it with another compatible endpoint
- Optionally add an API key if your chosen endpoint requires one
- Switch between the configured Q2, Q4, and Q8 Gemma variants
- Re-run the same prompt so you can compare coherence, stability, and SVG output
- Try a visual prompt such as `Draw a pelican riding a bicycle.`
@@ -121,4 +118,4 @@ The important takeaway is not that one quant is always "best." The important tak
## Conclusion
This lab isolates quantization as the main variable. By downloading **Gemma 4 E2B Instruct** in `UD-IQ2_M`, `Q4_K_M`, and `Q8_0`, you can directly observe one of the most important tradeoffs in local inference: balancing model quality against disk usage and resource constraints.
This lab isolates quantization as the main variable. By comparing **Gemma 4 E2B** in Q2, Q4, and Q8 Ollama variants, you can directly observe one of the most important tradeoffs in local inference: balancing model quality against efficiency and resource constraints.
+40 -36
View File
@@ -4,9 +4,9 @@ import {
extractAssistantTextContent,
extractObjective5Metrics,
extractSvgMarkup,
getOllamaChatEndpointCandidates,
isLocalEndpoint,
looksLikeOllamaModel,
normalizeOllamaChatEndpoint,
normalizeUpstreamChatEndpoint,
sanitizeSvgDocument,
type Objective5Message,
@@ -50,15 +50,6 @@ export async function POST(request: Request) {
);
}
if (!apiKey && !isLocalEndpoint(endpoint)) {
return NextResponse.json(
{
error: "An API key is required for remote endpoints.",
},
{ status: 400 },
);
}
if (!model) {
return NextResponse.json(
{
@@ -79,11 +70,11 @@ export async function POST(request: Request) {
const useOllamaChat = looksLikeOllamaModel(model);
const useLocalOpenAI = !useOllamaChat && isLocalEndpoint(endpoint);
let upstreamUrl: string;
let upstreamCandidates: string[];
try {
upstreamUrl = useOllamaChat
? normalizeOllamaChatEndpoint(endpoint)
: normalizeUpstreamChatEndpoint(endpoint);
upstreamCandidates = useOllamaChat
? getOllamaChatEndpointCandidates(endpoint)
: [normalizeUpstreamChatEndpoint(endpoint)];
} catch {
return NextResponse.json(
{
@@ -98,6 +89,12 @@ export async function POST(request: Request) {
: useLocalOpenAI
? LOCAL_OPENAI_UPSTREAM_TIMEOUT_MS
: OPENAI_UPSTREAM_TIMEOUT_MS;
try {
let lastStatus = 502;
let lastMessage = "The chat request could not reach the upstream endpoint.";
for (const upstreamUrl of upstreamCandidates) {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), upstreamTimeoutMs);
@@ -139,7 +136,8 @@ export async function POST(request: Request) {
}
if (!upstreamResponse.ok) {
const message =
lastStatus = upstreamResponse.status;
lastMessage =
typeof parsedBody === "object" &&
parsedBody !== null &&
"error" in parsedBody &&
@@ -149,33 +147,21 @@ export async function POST(request: Request) {
typeof parsedBody.error.message === "string"
? parsedBody.error.message
: `The upstream endpoint returned ${upstreamResponse.status}.`;
return NextResponse.json(
{
error: message,
},
{ status: upstreamResponse.status },
);
continue;
}
if (!parsedBody || typeof parsedBody !== "object") {
return NextResponse.json(
{
error: "The upstream endpoint returned an unreadable response.",
},
{ status: 502 },
);
lastStatus = 502;
lastMessage = "The upstream endpoint returned an unreadable response.";
continue;
}
const content = extractAssistantTextContent(parsedBody);
const metrics = extractObjective5Metrics(parsedBody);
if (!content) {
return NextResponse.json(
{
error: "The upstream endpoint returned no assistant content.",
},
{ status: 502 },
);
lastStatus = 502;
lastMessage = "The upstream endpoint returned no assistant content.";
continue;
}
const svgMarkup = extractSvgMarkup(content);
@@ -206,6 +192,26 @@ export async function POST(request: Request) {
role: "assistant",
svg: sanitizedSvg.svg,
});
} catch (caughtError) {
if (caughtError instanceof Error && caughtError.name === "AbortError") {
lastStatus = 504;
lastMessage = `The upstream endpoint timed out after ${Math.floor(upstreamTimeoutMs / 1000)} seconds.`;
continue;
}
lastStatus = 502;
lastMessage = "The chat request could not reach the upstream endpoint.";
} finally {
clearTimeout(timeoutId);
}
}
return NextResponse.json(
{
error: lastMessage,
},
{ status: lastStatus },
);
} catch (caughtError) {
if (caughtError instanceof Error && caughtError.name === "AbortError") {
return NextResponse.json(
@@ -222,7 +228,5 @@ export async function POST(request: Request) {
},
{ status: 502 },
);
} finally {
clearTimeout(timeoutId);
}
}
-10
View File
@@ -3,7 +3,6 @@ import {
extractModelOptions,
getDefaultObjective5ModelOptions,
getModelListEndpointCandidates,
isLocalEndpoint,
} from "~/lib/lab2-chat";
type ModelsRouteRequestBody = {
@@ -39,15 +38,6 @@ export async function POST(request: Request) {
);
}
if (!apiKey && !isLocalEndpoint(endpoint)) {
return NextResponse.json(
{
error: "An API key is required for remote endpoints.",
},
{ status: 400 },
);
}
let candidates: string[];
try {
candidates = getModelListEndpointCandidates(endpoint);
+7
View File
@@ -13,6 +13,13 @@ describe("LabContent", () => {
new Response(
JSON.stringify({
lab1NetronUrl: "http://127.0.0.1:8338",
lab2OllamaModels: [
{
label: "Gemma 4 E2B Q2",
value: "cajina/gemma4_e2b-q2_k_xl:v01",
},
],
lab2OllamaUrl: "http://127.0.0.1:11434",
lab3TerminalUrl: "http://127.0.0.1:7681/wetty",
}),
{ status: 200 },
+71 -9
View File
@@ -16,11 +16,45 @@ describe("Objective5Chat", () => {
const fetchMock = vi.fn(async (input: RequestInfo | URL) => {
const url = String(input);
if (url === "/courseware-runtime.json") {
return new Response(
JSON.stringify({
lab2OllamaModels: [
{
label: "Gemma 4 E2B Q2",
value: "cajina/gemma4_e2b-q2_k_xl:v01",
},
{
label: "Gemma 4 E2B Q4",
value: "batiai/gemma4-e2b:q4",
},
{
label: "Gemma 4 E2B Q8",
value: "bjoernb/gemma4-e2b-fast:latest",
},
],
lab2OllamaUrl: "http://127.0.0.1:11434",
}),
{ status: 200 },
);
}
if (url === "/api/lab2/models") {
return {
json: async () => ({
models: [
{ label: "LM Studio Qwen", value: "qwen3.5-9b-mlx" },
{
label: "Gemma 4 E2B Q2",
value: "cajina/gemma4_e2b-q2_k_xl:v01",
},
{
label: "Gemma 4 E2B Q4",
value: "batiai/gemma4-e2b:q4",
},
{
label: "Gemma 4 E2B Q8",
value: "bjoernb/gemma4-e2b-fast:latest",
},
{ label: "Custom model", value: LAB2_CUSTOM_MODEL_VALUE },
],
}),
@@ -74,8 +108,12 @@ describe("Objective5Chat", () => {
});
it("persists settings updates back to localStorage", async () => {
mockFetch();
render(<Objective5Chat />);
await screen.findByLabelText("Endpoint");
fireEvent.change(screen.getByLabelText("Endpoint"), {
target: { value: "https://saved.example/api" },
});
@@ -95,7 +133,9 @@ describe("Objective5Chat", () => {
target: { value: "http://127.0.0.1:1234" },
});
expect(await screen.findByRole("option", { name: "LM Studio Qwen" })).toBeInTheDocument();
expect(
await screen.findByRole("option", { name: "Gemma 4 E2B Q4" }),
).toBeInTheDocument();
expect(fetchMock).toHaveBeenCalledWith(
"/api/lab2/models",
expect.objectContaining({
@@ -109,9 +149,6 @@ describe("Objective5Chat", () => {
render(<Objective5Chat />);
fireEvent.change(screen.getByLabelText("API key"), {
target: { value: "sk-test" },
});
fireEvent.change(screen.getByLabelText("Prompt"), {
target: { value: "Compare these quantized models." },
});
@@ -129,11 +166,26 @@ describe("Objective5Chat", () => {
vi.fn(async (input: RequestInfo | URL) => {
const url = String(input);
if (url === "/courseware-runtime.json") {
return new Response(
JSON.stringify({
lab2OllamaModels: [
{
label: "Gemma 4 E2B Q4",
value: "batiai/gemma4-e2b:q4",
},
],
lab2OllamaUrl: "http://127.0.0.1:11434",
}),
{ status: 200 },
);
}
if (url === "/api/lab2/models") {
return {
json: async () => ({
models: [
{ label: "Gemma 4 E2B Q4_K_M", value: "gemma4:e2b-it-q4_K_M" },
{ label: "Gemma 4 E2B Q4", value: "batiai/gemma4-e2b:q4" },
{ label: "Custom model", value: LAB2_CUSTOM_MODEL_VALUE },
],
}),
@@ -160,9 +212,6 @@ describe("Objective5Chat", () => {
render(<Objective5Chat />);
fireEvent.change(screen.getByLabelText("API key"), {
target: { value: "sk-test" },
});
fireEvent.change(screen.getByLabelText("Prompt"), {
target: { value: "Draw a pelican riding a bicycle." },
});
@@ -174,4 +223,17 @@ describe("Objective5Chat", () => {
expect(screen.getByText("View SVG source")).toBeInTheDocument();
expect(screen.getByText("Tokens/sec 14.4")).toBeInTheDocument();
});
it("defaults to the managed Ollama runtime endpoint and models", async () => {
mockFetch();
render(<Objective5Chat />);
expect(await screen.findByLabelText("Endpoint")).toHaveValue(
"http://localhost:11434/",
);
expect(screen.getByLabelText("Model")).toHaveValue(
"cajina/gemma4_e2b-q2_k_xl:v01",
);
});
});
+73 -20
View File
@@ -1,14 +1,18 @@
"use client";
import { FormEvent, useCallback, useEffect, useMemo, useState } from "react";
import {
fetchCoursewareRuntimeConfig,
normalizeCoursewareRuntimeConfig,
} from "~/lib/courseware-runtime";
import {
getActiveModel,
getDefaultObjective5ModelOptions,
getDefaultObjective5Settings,
isLocalEndpoint,
LAB2_CHAT_STORAGE_KEY,
LAB2_CUSTOM_MODEL_VALUE,
LAB2_DEFAULT_ENDPOINT,
LAB2_LEGACY_DEFAULT_ENDPOINT,
type Objective5ModelOption,
type Objective5Metrics,
type Objective5Message,
@@ -93,8 +97,21 @@ function toApiConversation(messages: ChatTurn[]) {
}
export function Objective5Chat() {
const defaults = useMemo(() => getDefaultObjective5Settings(), []);
const defaultModelOptions = useMemo(() => getDefaultObjective5ModelOptions(), []);
const runtimeDefaults = useMemo(() => normalizeCoursewareRuntimeConfig(), []);
const defaults = useMemo(
() => ({
...getDefaultObjective5Settings(),
endpoint: runtimeDefaults.lab2OllamaUrl,
selectedModel:
runtimeDefaults.lab2OllamaModels[0]?.value ??
getDefaultObjective5Settings().selectedModel,
}),
[runtimeDefaults.lab2OllamaModels, runtimeDefaults.lab2OllamaUrl],
);
const defaultModelOptions = useMemo(
() => ensureCustomOption(runtimeDefaults.lab2OllamaModels),
[runtimeDefaults.lab2OllamaModels],
);
const [endpoint, setEndpoint] = useState(defaults.endpoint);
const [apiKey, setApiKey] = useState(defaults.apiKey);
const [selectedModel, setSelectedModel] = useState(defaults.selectedModel);
@@ -112,23 +129,70 @@ export function Objective5Chat() {
const activeModel = getActiveModel(selectedModel, customModel);
useEffect(() => {
let isCancelled = false;
async function loadInitialSettings() {
const resolvedRuntime = await fetchCoursewareRuntimeConfig().catch(() =>
normalizeCoursewareRuntimeConfig(),
);
if (isCancelled) {
return;
}
const resolvedModelOptions = ensureCustomOption(
resolvedRuntime.lab2OllamaModels,
);
const resolvedSelectedModel =
resolvedRuntime.lab2OllamaModels[0]?.value ?? defaults.selectedModel;
setModelOptions(resolvedModelOptions);
try {
const savedSettings = window.localStorage.getItem(LAB2_CHAT_STORAGE_KEY);
if (!savedSettings) {
setEndpoint(resolvedRuntime.lab2OllamaUrl);
setSelectedModel(resolvedSelectedModel);
setCustomModel("");
setApiKey("");
setHasLoadedSettings(true);
return;
}
const parsed = JSON.parse(savedSettings) as Partial<typeof defaults>;
setEndpoint(parsed.endpoint?.trim() || LAB2_DEFAULT_ENDPOINT);
const savedEndpoint = parsed.endpoint?.trim();
const nextEndpoint =
!savedEndpoint || savedEndpoint === LAB2_LEGACY_DEFAULT_ENDPOINT
? resolvedRuntime.lab2OllamaUrl
: savedEndpoint;
const savedSelectedModel = parsed.selectedModel?.trim();
const nextSelectedModel =
savedSelectedModel === LAB2_CUSTOM_MODEL_VALUE ||
resolvedModelOptions.some(
(option) => option.value === savedSelectedModel,
)
? savedSelectedModel || resolvedSelectedModel
: resolvedSelectedModel;
setEndpoint(nextEndpoint);
setApiKey(parsed.apiKey ?? "");
setSelectedModel(parsed.selectedModel?.trim() || defaults.selectedModel);
setSelectedModel(nextSelectedModel);
setCustomModel(parsed.customModel?.trim() || "");
} catch {
window.localStorage.removeItem(LAB2_CHAT_STORAGE_KEY);
setEndpoint(resolvedRuntime.lab2OllamaUrl);
setSelectedModel(resolvedSelectedModel);
setCustomModel("");
setApiKey("");
} finally {
setHasLoadedSettings(true);
}
}
void loadInitialSettings();
return () => {
isCancelled = true;
};
}, [defaults.selectedModel]);
useEffect(() => {
@@ -154,11 +218,6 @@ export function Objective5Chat() {
return;
}
if (!trimmedKey && !isLocalEndpoint(trimmedEndpoint)) {
setModelError("Enter an API key before refreshing remote models.");
return;
}
setIsRefreshingModels(true);
setModelError(null);
@@ -210,7 +269,6 @@ export function Objective5Chat() {
useEffect(() => {
if (!hasLoadedSettings) return;
if (!endpoint.trim()) return;
if (!apiKey.trim() && !isLocalEndpoint(endpoint.trim())) return;
void refreshModels();
}, [apiKey, endpoint, hasLoadedSettings, refreshModels]);
@@ -227,11 +285,6 @@ export function Objective5Chat() {
return;
}
if (!trimmedKey && !isLocalEndpoint(trimmedEndpoint)) {
setError("Enter an API key before sending a prompt to a remote endpoint.");
return;
}
if (!activeModel) {
setError("Choose one of the quantized models or enter a custom model name.");
return;
@@ -305,7 +358,7 @@ export function Objective5Chat() {
<section className="objective5-chat" data-widget-enhanced="true">
<div className="objective5-chat__header">
<p className="objective5-chat__eyebrow">Objective 5 Lab Widget</p>
<h3>Compare qualitative output with a hosted chat endpoint</h3>
<h3>Compare qualitative output with the managed Ollama endpoint</h3>
<p className="objective5-chat__lede">
Switch between quantized models, reuse the same prompt, and ask for
text or simple SVG sketches like{" "}
@@ -379,9 +432,9 @@ export function Objective5Chat() {
</div>
<p className="objective5-chat__settings-note">
Settings stay in your browser for this lab only. Available models are
refreshed from the configured endpoint, and changing the model does not
clear the transcript.
This widget starts on the courseware Ollama service by default. You can
still swap the endpoint, add an API key, refresh the available models,
and change models without clearing the transcript.
</p>
{modelError ? (
+68
View File
@@ -1,14 +1,38 @@
export const COURSEWARE_RUNTIME_CONFIG_PATH = "/courseware-runtime.json";
export const LAB1_DEFAULT_NETRON_URL = "http://127.0.0.1:8338";
export const LAB2_DEFAULT_OLLAMA_URL = "http://127.0.0.1:11434";
export const LAB2_DEFAULT_OLLAMA_MODELS = [
{
label: "Gemma 4 E2B Q2",
value: "cajina/gemma4_e2b-q2_k_xl:v01",
},
{
label: "Gemma 4 E2B Q4",
value: "batiai/gemma4-e2b:q4",
},
{
label: "Gemma 4 E2B Q8",
value: "bjoernb/gemma4-e2b-fast:latest",
},
] as const;
export const LAB3_DEFAULT_TERMINAL_PATH = "/wetty";
export type CoursewareRuntimeModelOption = {
label: string;
value: string;
};
export type CoursewareRuntimeConfig = {
lab1NetronUrl?: string;
lab2OllamaModels?: CoursewareRuntimeModelOption[];
lab2OllamaUrl?: string;
lab3TerminalUrl?: string;
};
export type ResolvedCoursewareRuntimeConfig = {
lab1NetronUrl: string;
lab2OllamaModels: CoursewareRuntimeModelOption[];
lab2OllamaUrl: string;
lab3TerminalUrl: string;
};
@@ -50,6 +74,48 @@ export function getLab1NetronUrl(
return rewriteLoopbackHost(trimmedValue, currentHostname);
}
export function getLab2OllamaUrl(
envValue?: string,
currentHostname = getCurrentHostname(),
) {
const trimmedValue = envValue?.trim();
if (!trimmedValue) {
return rewriteLoopbackHost(LAB2_DEFAULT_OLLAMA_URL, currentHostname);
}
return rewriteLoopbackHost(trimmedValue, currentHostname);
}
export function getLab2OllamaModels(
envValue?: CoursewareRuntimeModelOption[],
) {
if (!Array.isArray(envValue) || envValue.length === 0) {
return LAB2_DEFAULT_OLLAMA_MODELS.map((model) => ({ ...model }));
}
const normalizedModels = envValue
.map((model) => {
const label = model?.label?.trim();
const value = model?.value?.trim();
if (!label || !value) {
return null;
}
return { label, value } satisfies CoursewareRuntimeModelOption;
})
.filter(
(model): model is CoursewareRuntimeModelOption => model !== null,
);
if (normalizedModels.length === 0) {
return LAB2_DEFAULT_OLLAMA_MODELS.map((model) => ({ ...model }));
}
return normalizedModels;
}
export function getLab3TerminalPath(
envValue?: string,
currentHostname = getCurrentHostname(),
@@ -73,6 +139,8 @@ export function normalizeCoursewareRuntimeConfig(
): ResolvedCoursewareRuntimeConfig {
return {
lab1NetronUrl: getLab1NetronUrl(config?.lab1NetronUrl, currentHostname),
lab2OllamaModels: getLab2OllamaModels(config?.lab2OllamaModels),
lab2OllamaUrl: getLab2OllamaUrl(config?.lab2OllamaUrl, currentHostname),
lab3TerminalUrl: getLab3TerminalPath(
config?.lab3TerminalUrl,
currentHostname,
+37 -6
View File
@@ -4,6 +4,7 @@ import {
extractObjective5Metrics,
extractModelOptions,
extractSvgMarkup,
getOllamaChatEndpointCandidates,
getModelListEndpointCandidates,
isLocalEndpoint,
normalizeOllamaChatEndpoint,
@@ -38,21 +39,31 @@ describe("extractSvgMarkup", () => {
});
describe("normalizeOllamaChatEndpoint", () => {
it("appends the ollama chat path to a base api endpoint", () => {
expect(normalizeOllamaChatEndpoint("https://ai.zuccaro.me/api")).toBe(
"https://ai.zuccaro.me/ollama/api/chat",
it("prefers the native ollama chat path for a bare ollama endpoint", () => {
expect(normalizeOllamaChatEndpoint("http://127.0.0.1:11434")).toBe(
"http://127.0.0.1:11434/api/chat",
);
});
});
describe("getModelListEndpointCandidates", () => {
it("prefers v1 models for bare local endpoints", () => {
it("tries native ollama tags before openai model listings for bare local endpoints", () => {
expect(getModelListEndpointCandidates("http://127.0.0.1:1234")).toEqual([
"http://127.0.0.1:1234/api/tags",
"http://127.0.0.1:1234/v1/models",
]);
});
});
describe("getOllamaChatEndpointCandidates", () => {
it("tries native and proxied ollama chat routes", () => {
expect(getOllamaChatEndpointCandidates("http://127.0.0.1:8080")).toEqual([
"http://127.0.0.1:8080/api/chat",
"http://127.0.0.1:8080/ollama/api/chat",
]);
});
});
describe("isLocalEndpoint", () => {
it("detects localhost endpoints", () => {
expect(isLocalEndpoint("http://127.0.0.1:1234")).toBe(true);
@@ -115,12 +126,32 @@ describe("extractModelOptions", () => {
extractModelOptions({
data: [
{ id: "qwen3.5-9b-mlx", object: "model" },
{ id: "gemma4:e2b-it-q4_K_M", name: "Gemma 4 E2B Q4_K_M" },
{ id: "batiai/gemma4-e2b:q4", name: "Gemma 4 E2B Q4" },
],
}),
).toEqual([
{ label: "qwen3.5-9b-mlx", value: "qwen3.5-9b-mlx" },
{ label: "Gemma 4 E2B Q4_K_M", value: "gemma4:e2b-it-q4_K_M" },
{ label: "Gemma 4 E2B Q4", value: "batiai/gemma4-e2b:q4" },
]);
});
it("maps ollama tag payloads into dropdown options", () => {
expect(
extractModelOptions({
models: [
{ model: "cajina/gemma4_e2b-q2_k_xl:v01" },
{ name: "bjoernb/gemma4-e2b-fast:latest" },
],
}),
).toEqual([
{
label: "Gemma 4 E2B Q2",
value: "cajina/gemma4_e2b-q2_k_xl:v01",
},
{
label: "Gemma 4 E2B Q8",
value: "bjoernb/gemma4-e2b-fast:latest",
},
]);
});
});
+92 -27
View File
@@ -3,23 +3,21 @@ import {
XMLSerializer,
type Element as XmlDomElement,
} from "@xmldom/xmldom";
import {
LAB2_DEFAULT_OLLAMA_MODELS,
LAB2_DEFAULT_OLLAMA_URL,
} from "~/lib/courseware-runtime";
export const LAB2_CHAT_STORAGE_KEY = "lab2-objective5-chat-settings";
export const LAB2_DEFAULT_ENDPOINT = "https://ai.zuccaro.me/api";
export const LAB2_DEFAULT_ENDPOINT = LAB2_DEFAULT_OLLAMA_URL;
export const LAB2_LEGACY_DEFAULT_ENDPOINT = "https://ai.zuccaro.me/api";
export const LAB2_CUSTOM_MODEL_VALUE = "__custom__";
export const LAB2_MAX_CONTEXT_MESSAGES = 10;
export const LAB2_MAX_MESSAGE_LENGTH = 4000;
export const LAB2_MAX_SVG_LENGTH = 20000;
export const LAB2_MODEL_OPTIONS = [
{
label: "Gemma 4 E2B Q8_0",
value: "gemma4:e2b-it-q8_0",
},
{
label: "Gemma 4 E2B Q4_K_M",
value: "gemma4:e2b-it-q4_K_M",
},
...LAB2_DEFAULT_OLLAMA_MODELS,
{
label: "Custom model",
value: LAB2_CUSTOM_MODEL_VALUE,
@@ -111,6 +109,10 @@ export type SvgSanitizationResult =
| SvgSanitizationFailure
| SvgSanitizationSuccess;
const predefinedModelLabels = new Map<string, string>(
LAB2_DEFAULT_OLLAMA_MODELS.map((model) => [model.value, model.label]),
);
const SVG_NAMESPACE = "http://www.w3.org/2000/svg";
const allowedSvgElements = new Set([
"svg",
@@ -246,24 +248,35 @@ export function getModelListEndpointCandidates(endpoint: string) {
const url = new URL(endpoint);
const trimmedPath = url.pathname.replace(/\/+$/, "");
if (trimmedPath.endsWith("/models")) {
if (
trimmedPath.endsWith("/models") ||
trimmedPath.endsWith("/api/tags")
) {
url.hash = "";
return [url.toString()];
}
const paths = new Set<string>();
if (trimmedPath.endsWith("/api")) {
if (trimmedPath.endsWith("/ollama/api")) {
paths.add("/ollama/api/tags");
} else if (trimmedPath.endsWith("/ollama")) {
paths.add("/ollama/api/tags");
} else if (trimmedPath.endsWith("/api")) {
paths.add("/api/tags");
paths.add("/api/v1/models");
paths.add("/api/models");
} else if (trimmedPath.endsWith("/api/v1")) {
paths.add("/api/tags");
paths.add("/api/v1/models");
paths.add("/api/models");
} else if (trimmedPath.endsWith("/v1")) {
paths.add("/v1/models");
} else if (trimmedPath.length === 0) {
paths.add("/api/tags");
paths.add("/v1/models");
} else {
paths.add(`${trimmedPath}/api/tags`);
paths.add(`${trimmedPath}/v1/models`);
paths.add(`${trimmedPath}/models`);
}
@@ -277,21 +290,48 @@ export function getModelListEndpointCandidates(endpoint: string) {
}
export function normalizeOllamaChatEndpoint(endpoint: string) {
return getOllamaChatEndpointCandidates(endpoint)[0];
}
export function getOllamaChatEndpointCandidates(endpoint: string) {
const url = new URL(endpoint);
const trimmedPath = url.pathname.replace(/\/+$/, "");
if (trimmedPath.endsWith("/ollama/api/chat")) {
if (
trimmedPath.endsWith("/api/chat") ||
trimmedPath.endsWith("/ollama/api/chat")
) {
url.pathname = trimmedPath;
} else if (trimmedPath.endsWith("/api") || trimmedPath.endsWith("/api/v1")) {
url.pathname = "/ollama/api/chat";
} else if (trimmedPath.length === 0) {
url.pathname = "/ollama/api/chat";
} else {
url.pathname = `${trimmedPath}/ollama/api/chat`;
url.hash = "";
return [url.toString()];
}
url.hash = "";
return url.toString();
const paths = new Set<string>();
if (trimmedPath.endsWith("/ollama/api")) {
paths.add("/ollama/api/chat");
} else if (trimmedPath.endsWith("/ollama")) {
paths.add("/ollama/api/chat");
} else if (trimmedPath.endsWith("/api")) {
paths.add("/api/chat");
paths.add("/ollama/api/chat");
} else if (trimmedPath.endsWith("/api/v1") || trimmedPath.endsWith("/v1")) {
paths.add("/ollama/api/chat");
paths.add("/api/chat");
} else if (trimmedPath.length === 0) {
paths.add("/api/chat");
paths.add("/ollama/api/chat");
} else {
paths.add(`${trimmedPath}/api/chat`);
paths.add(`${trimmedPath}/ollama/api/chat`);
}
return Array.from(paths).map((path) => {
const candidate = new URL(url.toString());
candidate.pathname = path;
candidate.hash = "";
return candidate.toString();
});
}
export function looksLikeOllamaModel(model: string) {
@@ -373,15 +413,11 @@ export function extractAssistantTextContent(payload: ChatCompletionPayload) {
}
export function extractModelOptions(payload: unknown): Objective5ModelOption[] {
if (
!payload ||
typeof payload !== "object" ||
!("data" in payload) ||
!Array.isArray(payload.data)
) {
if (!payload || typeof payload !== "object") {
return [];
}
if ("data" in payload && Array.isArray(payload.data)) {
return payload.data
.map((item) => {
if (!item || typeof item !== "object") return null;
@@ -391,12 +427,37 @@ export function extractModelOptions(payload: unknown): Objective5ModelOption[] {
const label =
"name" in item && typeof item.name === "string" && item.name.trim()
? item.name.trim()
: value;
: getModelLabel(value);
if (!value) return null;
return { label, value } satisfies Objective5ModelOption;
})
.filter((item): item is Objective5ModelOption => item !== null);
}
if (!("models" in payload) || !Array.isArray(payload.models)) {
return [];
}
return payload.models
.map((item) => {
if (!item || typeof item !== "object") return null;
const value =
("model" in item && typeof item.model === "string" && item.model.trim()
? item.model
: "name" in item && typeof item.name === "string"
? item.name
: ""
).trim();
if (!value) return null;
return {
label: getModelLabel(value),
value,
} satisfies Objective5ModelOption;
})
.filter((item): item is Objective5ModelOption => item !== null);
}
export function extractObjective5Metrics(
@@ -600,6 +661,10 @@ export function getDefaultObjective5ModelOptions(): Objective5ModelOption[] {
return [...LAB2_MODEL_OPTIONS];
}
function getModelLabel(value: string) {
return predefinedModelLabels.get(value) ?? value;
}
function validateSvgNode(node: XmlDomElement): string | null {
if (!allowedSvgElements.has(node.tagName)) {
return `The SVG used a blocked element: <${node.tagName}>.`;