diff --git a/content/labs/lab-2-quantization-tradeoffs.md b/content/labs/lab-2-quantization-tradeoffs.md index 6a7264e..9704280 100644 --- a/content/labs/lab-2-quantization-tradeoffs.md +++ b/content/labs/lab-2-quantization-tradeoffs.md @@ -1,7 +1,7 @@ --- order: 2 title: "Lab 2 - Quantization Tradeoffs: Comparing 2-bit, 4-bit, and 8-bit" -description: Download Gemma 4 E2B in three GGUF quantizations and compare size, metadata, and output quality. +description: Compare Gemma 4 E2B in three Ollama quantizations and study how lower precision changes behavior. --- @@ -10,8 +10,8 @@ description: Download Gemma 4 E2B in three GGUF quantizations and compare size, In this lab, we will: -- Download the same Gemma model in `UD-IQ2_M`, `Q4_K_M`, and `Q8_0` -- Compare file size and GGUF metadata across those quantizations +- Pull the same Gemma model in Q2, Q4, and Q8 Ollama variants +- Compare the quantization labels and model behavior across those variants - Observe how lower precision changes the model's behavior - Build intuition for when a smaller quant may or may not be worth it @@ -23,19 +23,15 @@ In this lab, we will: ## Objective 1: Understand the Model and the Quantizations -For this lab, we will use the Hugging Face repository for **Unsloth's GGUF release of Gemma 4 E2B Instruct**: +For this lab, we will use three Ollama-published variants of **Gemma 4 E2B** that represent distinct precision bands: - +| Precision band | Ollama model tag | Why we are using it | +| -------------- | ----------------------------------- | --------------------------------------- | +| Q2 | `cajina/gemma4_e2b-q2_k_xl:v01` | Most aggressive compression in this lab | +| Q4 | `batiai/gemma4-e2b:q4` | Common middle-ground quant | +| Q8 | `bjoernb/gemma4-e2b-fast:latest` | Highest-quality quant in this lab | -This repository currently exposes multiple GGUF variants of the same base model. We will focus on one file from each of these precision bands: - -| Precision band | GGUF file | Why we are using it | File Size | -| -------------- | ------------------------------ | --------------------------------------- |-----------| -| 2-bit | `gemma-4-E2B-it-UD-IQ2_M.gguf` | Most aggressive compression in this lab | 2.4 GB | -| 4-bit | `gemma-4-E2B-it-Q4_K_M.gguf` | Common middle-ground quant | 3.17 GB | -| 8-bit | `gemma-4-E2B-it-Q8_0.gguf` | Highest-quality quant in this lab | 5.05 GB | - -Even though the filenames differ, these are all the same underlying instruction-tuned Gemma 4 E2B model. The main variable we are changing is how the weights are stored. +Even though the Ollama tags differ, these are all variants of the same underlying Gemma 4 E2B model family. The main variable we are changing is how the weights are stored. When we say these files are the same model, we mean that the overall neural network is still the same: @@ -97,10 +93,11 @@ The viewer below zooms out from one weight and instead shows a toy layer with 16 ### Explore: Compare the same prompts through the hosted chat widget -If your instructor provides an OpenAI-compatible endpoint, you can compare the same prompts through the embedded chat tool below: +By default, the widget below points to the courseware-managed Ollama service and the three Lab 2 model tags above. You can still switch to another endpoint if your instructor provides one. -- Paste the lab endpoint and API key into the settings row -- Switch between `Q8_0`, `Q4_K_M`, and `UD-IQ2_M` +- Use the preloaded managed endpoint or replace it with another compatible endpoint +- Optionally add an API key if your chosen endpoint requires one +- Switch between the configured Q2, Q4, and Q8 Gemma variants - Re-run the same prompt so you can compare coherence, stability, and SVG output - Try a visual prompt such as `Draw a pelican riding a bicycle.` @@ -121,4 +118,4 @@ The important takeaway is not that one quant is always "best." The important tak ## Conclusion -This lab isolates quantization as the main variable. By downloading **Gemma 4 E2B Instruct** in `UD-IQ2_M`, `Q4_K_M`, and `Q8_0`, you can directly observe one of the most important tradeoffs in local inference: balancing model quality against disk usage and resource constraints. +This lab isolates quantization as the main variable. By comparing **Gemma 4 E2B** in Q2, Q4, and Q8 Ollama variants, you can directly observe one of the most important tradeoffs in local inference: balancing model quality against efficiency and resource constraints. diff --git a/src/app/api/lab2/chat/route.ts b/src/app/api/lab2/chat/route.ts index 6931f07..035b522 100644 --- a/src/app/api/lab2/chat/route.ts +++ b/src/app/api/lab2/chat/route.ts @@ -4,9 +4,9 @@ import { extractAssistantTextContent, extractObjective5Metrics, extractSvgMarkup, + getOllamaChatEndpointCandidates, isLocalEndpoint, looksLikeOllamaModel, - normalizeOllamaChatEndpoint, normalizeUpstreamChatEndpoint, sanitizeSvgDocument, type Objective5Message, @@ -50,15 +50,6 @@ export async function POST(request: Request) { ); } - if (!apiKey && !isLocalEndpoint(endpoint)) { - return NextResponse.json( - { - error: "An API key is required for remote endpoints.", - }, - { status: 400 }, - ); - } - if (!model) { return NextResponse.json( { @@ -79,11 +70,11 @@ export async function POST(request: Request) { const useOllamaChat = looksLikeOllamaModel(model); const useLocalOpenAI = !useOllamaChat && isLocalEndpoint(endpoint); - let upstreamUrl: string; + let upstreamCandidates: string[]; try { - upstreamUrl = useOllamaChat - ? normalizeOllamaChatEndpoint(endpoint) - : normalizeUpstreamChatEndpoint(endpoint); + upstreamCandidates = useOllamaChat + ? getOllamaChatEndpointCandidates(endpoint) + : [normalizeUpstreamChatEndpoint(endpoint)]; } catch { return NextResponse.json( { @@ -98,114 +89,129 @@ export async function POST(request: Request) { : useLocalOpenAI ? LOCAL_OPENAI_UPSTREAM_TIMEOUT_MS : OPENAI_UPSTREAM_TIMEOUT_MS; - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), upstreamTimeoutMs); try { - const upstreamResponse = await fetch(upstreamUrl, { - body: JSON.stringify( - useOllamaChat - ? { - messages: buildUpstreamMessages(body.messages), - model, - stream: false, - } - : { - messages: buildUpstreamMessages(body.messages), - model, - stream: false, - temperature: 0.8, - }, - ), - headers: { - ...(apiKey - ? { - Authorization: `Bearer ${apiKey}`, - } - : {}), - "Content-Type": "application/json", + let lastStatus = 502; + let lastMessage = "The chat request could not reach the upstream endpoint."; + + for (const upstreamUrl of upstreamCandidates) { + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), upstreamTimeoutMs); + + try { + const upstreamResponse = await fetch(upstreamUrl, { + body: JSON.stringify( + useOllamaChat + ? { + messages: buildUpstreamMessages(body.messages), + model, + stream: false, + } + : { + messages: buildUpstreamMessages(body.messages), + model, + stream: false, + temperature: 0.8, + }, + ), + headers: { + ...(apiKey + ? { + Authorization: `Bearer ${apiKey}`, + } + : {}), + "Content-Type": "application/json", + }, + method: "POST", + signal: controller.signal, + }); + + const responseText = await upstreamResponse.text(); + let parsedBody: unknown = null; + + try { + parsedBody = JSON.parse(responseText); + } catch { + parsedBody = null; + } + + if (!upstreamResponse.ok) { + lastStatus = upstreamResponse.status; + lastMessage = + typeof parsedBody === "object" && + parsedBody !== null && + "error" in parsedBody && + typeof parsedBody.error === "object" && + parsedBody.error !== null && + "message" in parsedBody.error && + typeof parsedBody.error.message === "string" + ? parsedBody.error.message + : `The upstream endpoint returned ${upstreamResponse.status}.`; + continue; + } + + if (!parsedBody || typeof parsedBody !== "object") { + lastStatus = 502; + lastMessage = "The upstream endpoint returned an unreadable response."; + continue; + } + + const content = extractAssistantTextContent(parsedBody); + const metrics = extractObjective5Metrics(parsedBody); + if (!content) { + lastStatus = 502; + lastMessage = "The upstream endpoint returned no assistant content."; + continue; + } + + const svgMarkup = extractSvgMarkup(content); + if (!svgMarkup) { + return NextResponse.json({ + content, + metrics, + renderMode: "text", + role: "assistant", + }); + } + + const sanitizedSvg = sanitizeSvgDocument(svgMarkup); + if (!sanitizedSvg.ok) { + return NextResponse.json({ + content, + error: `${sanitizedSvg.error} Showing the raw response instead.`, + metrics, + renderMode: "text", + role: "assistant", + }); + } + + return NextResponse.json({ + content, + metrics, + renderMode: "svg", + role: "assistant", + svg: sanitizedSvg.svg, + }); + } catch (caughtError) { + if (caughtError instanceof Error && caughtError.name === "AbortError") { + lastStatus = 504; + lastMessage = `The upstream endpoint timed out after ${Math.floor(upstreamTimeoutMs / 1000)} seconds.`; + continue; + } + + lastStatus = 502; + lastMessage = "The chat request could not reach the upstream endpoint."; + } finally { + clearTimeout(timeoutId); + } + } + + return NextResponse.json( + { + error: lastMessage, }, - method: "POST", - signal: controller.signal, - }); - - const responseText = await upstreamResponse.text(); - let parsedBody: unknown = null; - - try { - parsedBody = JSON.parse(responseText); - } catch { - parsedBody = null; - } - - if (!upstreamResponse.ok) { - const message = - typeof parsedBody === "object" && - parsedBody !== null && - "error" in parsedBody && - typeof parsedBody.error === "object" && - parsedBody.error !== null && - "message" in parsedBody.error && - typeof parsedBody.error.message === "string" - ? parsedBody.error.message - : `The upstream endpoint returned ${upstreamResponse.status}.`; - - return NextResponse.json( - { - error: message, - }, - { status: upstreamResponse.status }, - ); - } - - if (!parsedBody || typeof parsedBody !== "object") { - return NextResponse.json( - { - error: "The upstream endpoint returned an unreadable response.", - }, - { status: 502 }, - ); - } - - const content = extractAssistantTextContent(parsedBody); - const metrics = extractObjective5Metrics(parsedBody); - if (!content) { - return NextResponse.json( - { - error: "The upstream endpoint returned no assistant content.", - }, - { status: 502 }, - ); - } - - const svgMarkup = extractSvgMarkup(content); - if (!svgMarkup) { - return NextResponse.json({ - content, - metrics, - renderMode: "text", - role: "assistant", - }); - } - - const sanitizedSvg = sanitizeSvgDocument(svgMarkup); - if (!sanitizedSvg.ok) { - return NextResponse.json({ - content, - error: `${sanitizedSvg.error} Showing the raw response instead.`, - metrics, - renderMode: "text", - role: "assistant", - }); - } - - return NextResponse.json({ - content, - metrics, - renderMode: "svg", - role: "assistant", - svg: sanitizedSvg.svg, - }); + { status: lastStatus }, + ); } catch (caughtError) { if (caughtError instanceof Error && caughtError.name === "AbortError") { return NextResponse.json( @@ -222,7 +228,5 @@ export async function POST(request: Request) { }, { status: 502 }, ); - } finally { - clearTimeout(timeoutId); } } diff --git a/src/app/api/lab2/models/route.ts b/src/app/api/lab2/models/route.ts index ce08683..6c57544 100644 --- a/src/app/api/lab2/models/route.ts +++ b/src/app/api/lab2/models/route.ts @@ -3,7 +3,6 @@ import { extractModelOptions, getDefaultObjective5ModelOptions, getModelListEndpointCandidates, - isLocalEndpoint, } from "~/lib/lab2-chat"; type ModelsRouteRequestBody = { @@ -39,15 +38,6 @@ export async function POST(request: Request) { ); } - if (!apiKey && !isLocalEndpoint(endpoint)) { - return NextResponse.json( - { - error: "An API key is required for remote endpoints.", - }, - { status: 400 }, - ); - } - let candidates: string[]; try { candidates = getModelListEndpointCandidates(endpoint); diff --git a/src/components/labs/LabContent.test.tsx b/src/components/labs/LabContent.test.tsx index 70f884f..4e3204f 100644 --- a/src/components/labs/LabContent.test.tsx +++ b/src/components/labs/LabContent.test.tsx @@ -13,6 +13,13 @@ describe("LabContent", () => { new Response( JSON.stringify({ lab1NetronUrl: "http://127.0.0.1:8338", + lab2OllamaModels: [ + { + label: "Gemma 4 E2B Q2", + value: "cajina/gemma4_e2b-q2_k_xl:v01", + }, + ], + lab2OllamaUrl: "http://127.0.0.1:11434", lab3TerminalUrl: "http://127.0.0.1:7681/wetty", }), { status: 200 }, diff --git a/src/components/labs/Objective5Chat.test.tsx b/src/components/labs/Objective5Chat.test.tsx index 55a067a..825ee96 100644 --- a/src/components/labs/Objective5Chat.test.tsx +++ b/src/components/labs/Objective5Chat.test.tsx @@ -16,11 +16,45 @@ describe("Objective5Chat", () => { const fetchMock = vi.fn(async (input: RequestInfo | URL) => { const url = String(input); + if (url === "/courseware-runtime.json") { + return new Response( + JSON.stringify({ + lab2OllamaModels: [ + { + label: "Gemma 4 E2B Q2", + value: "cajina/gemma4_e2b-q2_k_xl:v01", + }, + { + label: "Gemma 4 E2B Q4", + value: "batiai/gemma4-e2b:q4", + }, + { + label: "Gemma 4 E2B Q8", + value: "bjoernb/gemma4-e2b-fast:latest", + }, + ], + lab2OllamaUrl: "http://127.0.0.1:11434", + }), + { status: 200 }, + ); + } + if (url === "/api/lab2/models") { return { json: async () => ({ models: [ - { label: "LM Studio Qwen", value: "qwen3.5-9b-mlx" }, + { + label: "Gemma 4 E2B Q2", + value: "cajina/gemma4_e2b-q2_k_xl:v01", + }, + { + label: "Gemma 4 E2B Q4", + value: "batiai/gemma4-e2b:q4", + }, + { + label: "Gemma 4 E2B Q8", + value: "bjoernb/gemma4-e2b-fast:latest", + }, { label: "Custom model", value: LAB2_CUSTOM_MODEL_VALUE }, ], }), @@ -74,8 +108,12 @@ describe("Objective5Chat", () => { }); it("persists settings updates back to localStorage", async () => { + mockFetch(); + render(); + await screen.findByLabelText("Endpoint"); + fireEvent.change(screen.getByLabelText("Endpoint"), { target: { value: "https://saved.example/api" }, }); @@ -95,7 +133,9 @@ describe("Objective5Chat", () => { target: { value: "http://127.0.0.1:1234" }, }); - expect(await screen.findByRole("option", { name: "LM Studio Qwen" })).toBeInTheDocument(); + expect( + await screen.findByRole("option", { name: "Gemma 4 E2B Q4" }), + ).toBeInTheDocument(); expect(fetchMock).toHaveBeenCalledWith( "/api/lab2/models", expect.objectContaining({ @@ -109,9 +149,6 @@ describe("Objective5Chat", () => { render(); - fireEvent.change(screen.getByLabelText("API key"), { - target: { value: "sk-test" }, - }); fireEvent.change(screen.getByLabelText("Prompt"), { target: { value: "Compare these quantized models." }, }); @@ -129,11 +166,26 @@ describe("Objective5Chat", () => { vi.fn(async (input: RequestInfo | URL) => { const url = String(input); + if (url === "/courseware-runtime.json") { + return new Response( + JSON.stringify({ + lab2OllamaModels: [ + { + label: "Gemma 4 E2B Q4", + value: "batiai/gemma4-e2b:q4", + }, + ], + lab2OllamaUrl: "http://127.0.0.1:11434", + }), + { status: 200 }, + ); + } + if (url === "/api/lab2/models") { return { json: async () => ({ models: [ - { label: "Gemma 4 E2B Q4_K_M", value: "gemma4:e2b-it-q4_K_M" }, + { label: "Gemma 4 E2B Q4", value: "batiai/gemma4-e2b:q4" }, { label: "Custom model", value: LAB2_CUSTOM_MODEL_VALUE }, ], }), @@ -160,9 +212,6 @@ describe("Objective5Chat", () => { render(); - fireEvent.change(screen.getByLabelText("API key"), { - target: { value: "sk-test" }, - }); fireEvent.change(screen.getByLabelText("Prompt"), { target: { value: "Draw a pelican riding a bicycle." }, }); @@ -174,4 +223,17 @@ describe("Objective5Chat", () => { expect(screen.getByText("View SVG source")).toBeInTheDocument(); expect(screen.getByText("Tokens/sec 14.4")).toBeInTheDocument(); }); + + it("defaults to the managed Ollama runtime endpoint and models", async () => { + mockFetch(); + + render(); + + expect(await screen.findByLabelText("Endpoint")).toHaveValue( + "http://localhost:11434/", + ); + expect(screen.getByLabelText("Model")).toHaveValue( + "cajina/gemma4_e2b-q2_k_xl:v01", + ); + }); }); diff --git a/src/components/labs/Objective5Chat.tsx b/src/components/labs/Objective5Chat.tsx index 36428ca..2ae09c4 100644 --- a/src/components/labs/Objective5Chat.tsx +++ b/src/components/labs/Objective5Chat.tsx @@ -1,14 +1,18 @@ "use client"; import { FormEvent, useCallback, useEffect, useMemo, useState } from "react"; +import { + fetchCoursewareRuntimeConfig, + normalizeCoursewareRuntimeConfig, +} from "~/lib/courseware-runtime"; import { getActiveModel, getDefaultObjective5ModelOptions, getDefaultObjective5Settings, - isLocalEndpoint, LAB2_CHAT_STORAGE_KEY, LAB2_CUSTOM_MODEL_VALUE, LAB2_DEFAULT_ENDPOINT, + LAB2_LEGACY_DEFAULT_ENDPOINT, type Objective5ModelOption, type Objective5Metrics, type Objective5Message, @@ -93,8 +97,21 @@ function toApiConversation(messages: ChatTurn[]) { } export function Objective5Chat() { - const defaults = useMemo(() => getDefaultObjective5Settings(), []); - const defaultModelOptions = useMemo(() => getDefaultObjective5ModelOptions(), []); + const runtimeDefaults = useMemo(() => normalizeCoursewareRuntimeConfig(), []); + const defaults = useMemo( + () => ({ + ...getDefaultObjective5Settings(), + endpoint: runtimeDefaults.lab2OllamaUrl, + selectedModel: + runtimeDefaults.lab2OllamaModels[0]?.value ?? + getDefaultObjective5Settings().selectedModel, + }), + [runtimeDefaults.lab2OllamaModels, runtimeDefaults.lab2OllamaUrl], + ); + const defaultModelOptions = useMemo( + () => ensureCustomOption(runtimeDefaults.lab2OllamaModels), + [runtimeDefaults.lab2OllamaModels], + ); const [endpoint, setEndpoint] = useState(defaults.endpoint); const [apiKey, setApiKey] = useState(defaults.apiKey); const [selectedModel, setSelectedModel] = useState(defaults.selectedModel); @@ -112,23 +129,70 @@ export function Objective5Chat() { const activeModel = getActiveModel(selectedModel, customModel); useEffect(() => { - try { - const savedSettings = window.localStorage.getItem(LAB2_CHAT_STORAGE_KEY); - if (!savedSettings) { - setHasLoadedSettings(true); + let isCancelled = false; + + async function loadInitialSettings() { + const resolvedRuntime = await fetchCoursewareRuntimeConfig().catch(() => + normalizeCoursewareRuntimeConfig(), + ); + if (isCancelled) { return; } - const parsed = JSON.parse(savedSettings) as Partial; - setEndpoint(parsed.endpoint?.trim() || LAB2_DEFAULT_ENDPOINT); - setApiKey(parsed.apiKey ?? ""); - setSelectedModel(parsed.selectedModel?.trim() || defaults.selectedModel); - setCustomModel(parsed.customModel?.trim() || ""); - } catch { - window.localStorage.removeItem(LAB2_CHAT_STORAGE_KEY); - } finally { - setHasLoadedSettings(true); + const resolvedModelOptions = ensureCustomOption( + resolvedRuntime.lab2OllamaModels, + ); + const resolvedSelectedModel = + resolvedRuntime.lab2OllamaModels[0]?.value ?? defaults.selectedModel; + + setModelOptions(resolvedModelOptions); + + try { + const savedSettings = window.localStorage.getItem(LAB2_CHAT_STORAGE_KEY); + if (!savedSettings) { + setEndpoint(resolvedRuntime.lab2OllamaUrl); + setSelectedModel(resolvedSelectedModel); + setCustomModel(""); + setApiKey(""); + setHasLoadedSettings(true); + return; + } + + const parsed = JSON.parse(savedSettings) as Partial; + const savedEndpoint = parsed.endpoint?.trim(); + const nextEndpoint = + !savedEndpoint || savedEndpoint === LAB2_LEGACY_DEFAULT_ENDPOINT + ? resolvedRuntime.lab2OllamaUrl + : savedEndpoint; + const savedSelectedModel = parsed.selectedModel?.trim(); + const nextSelectedModel = + savedSelectedModel === LAB2_CUSTOM_MODEL_VALUE || + resolvedModelOptions.some( + (option) => option.value === savedSelectedModel, + ) + ? savedSelectedModel || resolvedSelectedModel + : resolvedSelectedModel; + + setEndpoint(nextEndpoint); + setApiKey(parsed.apiKey ?? ""); + setSelectedModel(nextSelectedModel); + setCustomModel(parsed.customModel?.trim() || ""); + } catch { + window.localStorage.removeItem(LAB2_CHAT_STORAGE_KEY); + setEndpoint(resolvedRuntime.lab2OllamaUrl); + setSelectedModel(resolvedSelectedModel); + setCustomModel(""); + setApiKey(""); + } finally { + setHasLoadedSettings(true); + } } + + void loadInitialSettings(); + + return () => { + isCancelled = true; + }; }, [defaults.selectedModel]); useEffect(() => { @@ -154,11 +218,6 @@ export function Objective5Chat() { return; } - if (!trimmedKey && !isLocalEndpoint(trimmedEndpoint)) { - setModelError("Enter an API key before refreshing remote models."); - return; - } - setIsRefreshingModels(true); setModelError(null); @@ -210,7 +269,6 @@ export function Objective5Chat() { useEffect(() => { if (!hasLoadedSettings) return; if (!endpoint.trim()) return; - if (!apiKey.trim() && !isLocalEndpoint(endpoint.trim())) return; void refreshModels(); }, [apiKey, endpoint, hasLoadedSettings, refreshModels]); @@ -227,11 +285,6 @@ export function Objective5Chat() { return; } - if (!trimmedKey && !isLocalEndpoint(trimmedEndpoint)) { - setError("Enter an API key before sending a prompt to a remote endpoint."); - return; - } - if (!activeModel) { setError("Choose one of the quantized models or enter a custom model name."); return; @@ -305,7 +358,7 @@ export function Objective5Chat() {

Objective 5 Lab Widget

-

Compare qualitative output with a hosted chat endpoint

+

Compare qualitative output with the managed Ollama endpoint

Switch between quantized models, reuse the same prompt, and ask for text or simple SVG sketches like{" "} @@ -379,9 +432,9 @@ export function Objective5Chat() {

- Settings stay in your browser for this lab only. Available models are - refreshed from the configured endpoint, and changing the model does not - clear the transcript. + This widget starts on the courseware Ollama service by default. You can + still swap the endpoint, add an API key, refresh the available models, + and change models without clearing the transcript.

{modelError ? ( diff --git a/src/lib/courseware-runtime.ts b/src/lib/courseware-runtime.ts index 0c7b507..79d702c 100644 --- a/src/lib/courseware-runtime.ts +++ b/src/lib/courseware-runtime.ts @@ -1,14 +1,38 @@ export const COURSEWARE_RUNTIME_CONFIG_PATH = "/courseware-runtime.json"; export const LAB1_DEFAULT_NETRON_URL = "http://127.0.0.1:8338"; +export const LAB2_DEFAULT_OLLAMA_URL = "http://127.0.0.1:11434"; +export const LAB2_DEFAULT_OLLAMA_MODELS = [ + { + label: "Gemma 4 E2B Q2", + value: "cajina/gemma4_e2b-q2_k_xl:v01", + }, + { + label: "Gemma 4 E2B Q4", + value: "batiai/gemma4-e2b:q4", + }, + { + label: "Gemma 4 E2B Q8", + value: "bjoernb/gemma4-e2b-fast:latest", + }, +] as const; export const LAB3_DEFAULT_TERMINAL_PATH = "/wetty"; +export type CoursewareRuntimeModelOption = { + label: string; + value: string; +}; + export type CoursewareRuntimeConfig = { lab1NetronUrl?: string; + lab2OllamaModels?: CoursewareRuntimeModelOption[]; + lab2OllamaUrl?: string; lab3TerminalUrl?: string; }; export type ResolvedCoursewareRuntimeConfig = { lab1NetronUrl: string; + lab2OllamaModels: CoursewareRuntimeModelOption[]; + lab2OllamaUrl: string; lab3TerminalUrl: string; }; @@ -50,6 +74,48 @@ export function getLab1NetronUrl( return rewriteLoopbackHost(trimmedValue, currentHostname); } +export function getLab2OllamaUrl( + envValue?: string, + currentHostname = getCurrentHostname(), +) { + const trimmedValue = envValue?.trim(); + + if (!trimmedValue) { + return rewriteLoopbackHost(LAB2_DEFAULT_OLLAMA_URL, currentHostname); + } + + return rewriteLoopbackHost(trimmedValue, currentHostname); +} + +export function getLab2OllamaModels( + envValue?: CoursewareRuntimeModelOption[], +) { + if (!Array.isArray(envValue) || envValue.length === 0) { + return LAB2_DEFAULT_OLLAMA_MODELS.map((model) => ({ ...model })); + } + + const normalizedModels = envValue + .map((model) => { + const label = model?.label?.trim(); + const value = model?.value?.trim(); + + if (!label || !value) { + return null; + } + + return { label, value } satisfies CoursewareRuntimeModelOption; + }) + .filter( + (model): model is CoursewareRuntimeModelOption => model !== null, + ); + + if (normalizedModels.length === 0) { + return LAB2_DEFAULT_OLLAMA_MODELS.map((model) => ({ ...model })); + } + + return normalizedModels; +} + export function getLab3TerminalPath( envValue?: string, currentHostname = getCurrentHostname(), @@ -73,6 +139,8 @@ export function normalizeCoursewareRuntimeConfig( ): ResolvedCoursewareRuntimeConfig { return { lab1NetronUrl: getLab1NetronUrl(config?.lab1NetronUrl, currentHostname), + lab2OllamaModels: getLab2OllamaModels(config?.lab2OllamaModels), + lab2OllamaUrl: getLab2OllamaUrl(config?.lab2OllamaUrl, currentHostname), lab3TerminalUrl: getLab3TerminalPath( config?.lab3TerminalUrl, currentHostname, diff --git a/src/lib/lab2-chat.test.ts b/src/lib/lab2-chat.test.ts index b723b6a..9954b12 100644 --- a/src/lib/lab2-chat.test.ts +++ b/src/lib/lab2-chat.test.ts @@ -4,6 +4,7 @@ import { extractObjective5Metrics, extractModelOptions, extractSvgMarkup, + getOllamaChatEndpointCandidates, getModelListEndpointCandidates, isLocalEndpoint, normalizeOllamaChatEndpoint, @@ -38,21 +39,31 @@ describe("extractSvgMarkup", () => { }); describe("normalizeOllamaChatEndpoint", () => { - it("appends the ollama chat path to a base api endpoint", () => { - expect(normalizeOllamaChatEndpoint("https://ai.zuccaro.me/api")).toBe( - "https://ai.zuccaro.me/ollama/api/chat", + it("prefers the native ollama chat path for a bare ollama endpoint", () => { + expect(normalizeOllamaChatEndpoint("http://127.0.0.1:11434")).toBe( + "http://127.0.0.1:11434/api/chat", ); }); }); describe("getModelListEndpointCandidates", () => { - it("prefers v1 models for bare local endpoints", () => { + it("tries native ollama tags before openai model listings for bare local endpoints", () => { expect(getModelListEndpointCandidates("http://127.0.0.1:1234")).toEqual([ + "http://127.0.0.1:1234/api/tags", "http://127.0.0.1:1234/v1/models", ]); }); }); +describe("getOllamaChatEndpointCandidates", () => { + it("tries native and proxied ollama chat routes", () => { + expect(getOllamaChatEndpointCandidates("http://127.0.0.1:8080")).toEqual([ + "http://127.0.0.1:8080/api/chat", + "http://127.0.0.1:8080/ollama/api/chat", + ]); + }); +}); + describe("isLocalEndpoint", () => { it("detects localhost endpoints", () => { expect(isLocalEndpoint("http://127.0.0.1:1234")).toBe(true); @@ -115,12 +126,32 @@ describe("extractModelOptions", () => { extractModelOptions({ data: [ { id: "qwen3.5-9b-mlx", object: "model" }, - { id: "gemma4:e2b-it-q4_K_M", name: "Gemma 4 E2B Q4_K_M" }, + { id: "batiai/gemma4-e2b:q4", name: "Gemma 4 E2B Q4" }, ], }), ).toEqual([ { label: "qwen3.5-9b-mlx", value: "qwen3.5-9b-mlx" }, - { label: "Gemma 4 E2B Q4_K_M", value: "gemma4:e2b-it-q4_K_M" }, + { label: "Gemma 4 E2B Q4", value: "batiai/gemma4-e2b:q4" }, + ]); + }); + + it("maps ollama tag payloads into dropdown options", () => { + expect( + extractModelOptions({ + models: [ + { model: "cajina/gemma4_e2b-q2_k_xl:v01" }, + { name: "bjoernb/gemma4-e2b-fast:latest" }, + ], + }), + ).toEqual([ + { + label: "Gemma 4 E2B Q2", + value: "cajina/gemma4_e2b-q2_k_xl:v01", + }, + { + label: "Gemma 4 E2B Q8", + value: "bjoernb/gemma4-e2b-fast:latest", + }, ]); }); }); diff --git a/src/lib/lab2-chat.ts b/src/lib/lab2-chat.ts index 432c67d..d9f5da4 100644 --- a/src/lib/lab2-chat.ts +++ b/src/lib/lab2-chat.ts @@ -3,23 +3,21 @@ import { XMLSerializer, type Element as XmlDomElement, } from "@xmldom/xmldom"; +import { + LAB2_DEFAULT_OLLAMA_MODELS, + LAB2_DEFAULT_OLLAMA_URL, +} from "~/lib/courseware-runtime"; export const LAB2_CHAT_STORAGE_KEY = "lab2-objective5-chat-settings"; -export const LAB2_DEFAULT_ENDPOINT = "https://ai.zuccaro.me/api"; +export const LAB2_DEFAULT_ENDPOINT = LAB2_DEFAULT_OLLAMA_URL; +export const LAB2_LEGACY_DEFAULT_ENDPOINT = "https://ai.zuccaro.me/api"; export const LAB2_CUSTOM_MODEL_VALUE = "__custom__"; export const LAB2_MAX_CONTEXT_MESSAGES = 10; export const LAB2_MAX_MESSAGE_LENGTH = 4000; export const LAB2_MAX_SVG_LENGTH = 20000; export const LAB2_MODEL_OPTIONS = [ - { - label: "Gemma 4 E2B Q8_0", - value: "gemma4:e2b-it-q8_0", - }, - { - label: "Gemma 4 E2B Q4_K_M", - value: "gemma4:e2b-it-q4_K_M", - }, + ...LAB2_DEFAULT_OLLAMA_MODELS, { label: "Custom model", value: LAB2_CUSTOM_MODEL_VALUE, @@ -111,6 +109,10 @@ export type SvgSanitizationResult = | SvgSanitizationFailure | SvgSanitizationSuccess; +const predefinedModelLabels = new Map( + LAB2_DEFAULT_OLLAMA_MODELS.map((model) => [model.value, model.label]), +); + const SVG_NAMESPACE = "http://www.w3.org/2000/svg"; const allowedSvgElements = new Set([ "svg", @@ -246,24 +248,35 @@ export function getModelListEndpointCandidates(endpoint: string) { const url = new URL(endpoint); const trimmedPath = url.pathname.replace(/\/+$/, ""); - if (trimmedPath.endsWith("/models")) { + if ( + trimmedPath.endsWith("/models") || + trimmedPath.endsWith("/api/tags") + ) { url.hash = ""; return [url.toString()]; } const paths = new Set(); - if (trimmedPath.endsWith("/api")) { + if (trimmedPath.endsWith("/ollama/api")) { + paths.add("/ollama/api/tags"); + } else if (trimmedPath.endsWith("/ollama")) { + paths.add("/ollama/api/tags"); + } else if (trimmedPath.endsWith("/api")) { + paths.add("/api/tags"); paths.add("/api/v1/models"); paths.add("/api/models"); } else if (trimmedPath.endsWith("/api/v1")) { + paths.add("/api/tags"); paths.add("/api/v1/models"); paths.add("/api/models"); } else if (trimmedPath.endsWith("/v1")) { paths.add("/v1/models"); } else if (trimmedPath.length === 0) { + paths.add("/api/tags"); paths.add("/v1/models"); } else { + paths.add(`${trimmedPath}/api/tags`); paths.add(`${trimmedPath}/v1/models`); paths.add(`${trimmedPath}/models`); } @@ -277,21 +290,48 @@ export function getModelListEndpointCandidates(endpoint: string) { } export function normalizeOllamaChatEndpoint(endpoint: string) { + return getOllamaChatEndpointCandidates(endpoint)[0]; +} + +export function getOllamaChatEndpointCandidates(endpoint: string) { const url = new URL(endpoint); const trimmedPath = url.pathname.replace(/\/+$/, ""); - if (trimmedPath.endsWith("/ollama/api/chat")) { + if ( + trimmedPath.endsWith("/api/chat") || + trimmedPath.endsWith("/ollama/api/chat") + ) { url.pathname = trimmedPath; - } else if (trimmedPath.endsWith("/api") || trimmedPath.endsWith("/api/v1")) { - url.pathname = "/ollama/api/chat"; - } else if (trimmedPath.length === 0) { - url.pathname = "/ollama/api/chat"; - } else { - url.pathname = `${trimmedPath}/ollama/api/chat`; + url.hash = ""; + return [url.toString()]; } - url.hash = ""; - return url.toString(); + const paths = new Set(); + + if (trimmedPath.endsWith("/ollama/api")) { + paths.add("/ollama/api/chat"); + } else if (trimmedPath.endsWith("/ollama")) { + paths.add("/ollama/api/chat"); + } else if (trimmedPath.endsWith("/api")) { + paths.add("/api/chat"); + paths.add("/ollama/api/chat"); + } else if (trimmedPath.endsWith("/api/v1") || trimmedPath.endsWith("/v1")) { + paths.add("/ollama/api/chat"); + paths.add("/api/chat"); + } else if (trimmedPath.length === 0) { + paths.add("/api/chat"); + paths.add("/ollama/api/chat"); + } else { + paths.add(`${trimmedPath}/api/chat`); + paths.add(`${trimmedPath}/ollama/api/chat`); + } + + return Array.from(paths).map((path) => { + const candidate = new URL(url.toString()); + candidate.pathname = path; + candidate.hash = ""; + return candidate.toString(); + }); } export function looksLikeOllamaModel(model: string) { @@ -373,28 +413,49 @@ export function extractAssistantTextContent(payload: ChatCompletionPayload) { } export function extractModelOptions(payload: unknown): Objective5ModelOption[] { - if ( - !payload || - typeof payload !== "object" || - !("data" in payload) || - !Array.isArray(payload.data) - ) { + if (!payload || typeof payload !== "object") { return []; } - return payload.data + if ("data" in payload && Array.isArray(payload.data)) { + return payload.data + .map((item) => { + if (!item || typeof item !== "object") return null; + + const value = + "id" in item && typeof item.id === "string" ? item.id.trim() : ""; + const label = + "name" in item && typeof item.name === "string" && item.name.trim() + ? item.name.trim() + : getModelLabel(value); + + if (!value) return null; + return { label, value } satisfies Objective5ModelOption; + }) + .filter((item): item is Objective5ModelOption => item !== null); + } + + if (!("models" in payload) || !Array.isArray(payload.models)) { + return []; + } + + return payload.models .map((item) => { if (!item || typeof item !== "object") return null; const value = - "id" in item && typeof item.id === "string" ? item.id.trim() : ""; - const label = - "name" in item && typeof item.name === "string" && item.name.trim() - ? item.name.trim() - : value; + ("model" in item && typeof item.model === "string" && item.model.trim() + ? item.model + : "name" in item && typeof item.name === "string" + ? item.name + : "" + ).trim(); if (!value) return null; - return { label, value } satisfies Objective5ModelOption; + return { + label: getModelLabel(value), + value, + } satisfies Objective5ModelOption; }) .filter((item): item is Objective5ModelOption => item !== null); } @@ -600,6 +661,10 @@ export function getDefaultObjective5ModelOptions(): Objective5ModelOption[] { return [...LAB2_MODEL_OPTIONS]; } +function getModelLabel(value: string) { + return predefinedModelLabels.get(value) ?? value; +} + function validateSvgNode(node: XmlDomElement): string | null { if (!allowedSvgElements.has(node.tagName)) { return `The SVG used a blocked element: <${node.tagName}>.`;