Update
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
---
|
||||
order: 2
|
||||
title: "Lab 2 - Quantization Tradeoffs: Comparing 2-bit, 4-bit, and 8-bit"
|
||||
description: Download Gemma 4 E2B in three GGUF quantizations and compare size, metadata, and output quality.
|
||||
description: Compare Gemma 4 E2B in three Ollama quantizations and study how lower precision changes behavior.
|
||||
---
|
||||
|
||||
<!-- breakout-style: instruction-rails -->
|
||||
@@ -10,8 +10,8 @@ description: Download Gemma 4 E2B in three GGUF quantizations and compare size,
|
||||
|
||||
In this lab, we will:
|
||||
|
||||
- Download the same Gemma model in `UD-IQ2_M`, `Q4_K_M`, and `Q8_0`
|
||||
- Compare file size and GGUF metadata across those quantizations
|
||||
- Pull the same Gemma model in Q2, Q4, and Q8 Ollama variants
|
||||
- Compare the quantization labels and model behavior across those variants
|
||||
- Observe how lower precision changes the model's behavior
|
||||
- Build intuition for when a smaller quant may or may not be worth it
|
||||
|
||||
@@ -23,19 +23,15 @@ In this lab, we will:
|
||||
|
||||
## Objective 1: Understand the Model and the Quantizations
|
||||
|
||||
For this lab, we will use the Hugging Face repository for **Unsloth's GGUF release of Gemma 4 E2B Instruct**:
|
||||
For this lab, we will use three Ollama-published variants of **Gemma 4 E2B** that represent distinct precision bands:
|
||||
|
||||
<https://huggingface.co/unsloth/gemma-4-E2B-it-GGUF>
|
||||
| Precision band | Ollama model tag | Why we are using it |
|
||||
| -------------- | ----------------------------------- | --------------------------------------- |
|
||||
| Q2 | `cajina/gemma4_e2b-q2_k_xl:v01` | Most aggressive compression in this lab |
|
||||
| Q4 | `batiai/gemma4-e2b:q4` | Common middle-ground quant |
|
||||
| Q8 | `bjoernb/gemma4-e2b-fast:latest` | Highest-quality quant in this lab |
|
||||
|
||||
This repository currently exposes multiple GGUF variants of the same base model. We will focus on one file from each of these precision bands:
|
||||
|
||||
| Precision band | GGUF file | Why we are using it | File Size |
|
||||
| -------------- | ------------------------------ | --------------------------------------- |-----------|
|
||||
| 2-bit | `gemma-4-E2B-it-UD-IQ2_M.gguf` | Most aggressive compression in this lab | 2.4 GB |
|
||||
| 4-bit | `gemma-4-E2B-it-Q4_K_M.gguf` | Common middle-ground quant | 3.17 GB |
|
||||
| 8-bit | `gemma-4-E2B-it-Q8_0.gguf` | Highest-quality quant in this lab | 5.05 GB |
|
||||
|
||||
Even though the filenames differ, these are all the same underlying instruction-tuned Gemma 4 E2B model. The main variable we are changing is how the weights are stored.
|
||||
Even though the Ollama tags differ, these are all variants of the same underlying Gemma 4 E2B model family. The main variable we are changing is how the weights are stored.
|
||||
|
||||
When we say these files are the same model, we mean that the overall neural network is still the same:
|
||||
|
||||
@@ -97,10 +93,11 @@ The viewer below zooms out from one weight and instead shows a toy layer with 16
|
||||
|
||||
### Explore: Compare the same prompts through the hosted chat widget
|
||||
|
||||
If your instructor provides an OpenAI-compatible endpoint, you can compare the same prompts through the embedded chat tool below:
|
||||
By default, the widget below points to the courseware-managed Ollama service and the three Lab 2 model tags above. You can still switch to another endpoint if your instructor provides one.
|
||||
|
||||
- Paste the lab endpoint and API key into the settings row
|
||||
- Switch between `Q8_0`, `Q4_K_M`, and `UD-IQ2_M`
|
||||
- Use the preloaded managed endpoint or replace it with another compatible endpoint
|
||||
- Optionally add an API key if your chosen endpoint requires one
|
||||
- Switch between the configured Q2, Q4, and Q8 Gemma variants
|
||||
- Re-run the same prompt so you can compare coherence, stability, and SVG output
|
||||
- Try a visual prompt such as `Draw a pelican riding a bicycle.`
|
||||
|
||||
@@ -121,4 +118,4 @@ The important takeaway is not that one quant is always "best." The important tak
|
||||
|
||||
## Conclusion
|
||||
|
||||
This lab isolates quantization as the main variable. By downloading **Gemma 4 E2B Instruct** in `UD-IQ2_M`, `Q4_K_M`, and `Q8_0`, you can directly observe one of the most important tradeoffs in local inference: balancing model quality against disk usage and resource constraints.
|
||||
This lab isolates quantization as the main variable. By comparing **Gemma 4 E2B** in Q2, Q4, and Q8 Ollama variants, you can directly observe one of the most important tradeoffs in local inference: balancing model quality against efficiency and resource constraints.
|
||||
|
||||
@@ -4,9 +4,9 @@ import {
|
||||
extractAssistantTextContent,
|
||||
extractObjective5Metrics,
|
||||
extractSvgMarkup,
|
||||
getOllamaChatEndpointCandidates,
|
||||
isLocalEndpoint,
|
||||
looksLikeOllamaModel,
|
||||
normalizeOllamaChatEndpoint,
|
||||
normalizeUpstreamChatEndpoint,
|
||||
sanitizeSvgDocument,
|
||||
type Objective5Message,
|
||||
@@ -50,15 +50,6 @@ export async function POST(request: Request) {
|
||||
);
|
||||
}
|
||||
|
||||
if (!apiKey && !isLocalEndpoint(endpoint)) {
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: "An API key is required for remote endpoints.",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
if (!model) {
|
||||
return NextResponse.json(
|
||||
{
|
||||
@@ -79,11 +70,11 @@ export async function POST(request: Request) {
|
||||
|
||||
const useOllamaChat = looksLikeOllamaModel(model);
|
||||
const useLocalOpenAI = !useOllamaChat && isLocalEndpoint(endpoint);
|
||||
let upstreamUrl: string;
|
||||
let upstreamCandidates: string[];
|
||||
try {
|
||||
upstreamUrl = useOllamaChat
|
||||
? normalizeOllamaChatEndpoint(endpoint)
|
||||
: normalizeUpstreamChatEndpoint(endpoint);
|
||||
upstreamCandidates = useOllamaChat
|
||||
? getOllamaChatEndpointCandidates(endpoint)
|
||||
: [normalizeUpstreamChatEndpoint(endpoint)];
|
||||
} catch {
|
||||
return NextResponse.json(
|
||||
{
|
||||
@@ -98,6 +89,12 @@ export async function POST(request: Request) {
|
||||
: useLocalOpenAI
|
||||
? LOCAL_OPENAI_UPSTREAM_TIMEOUT_MS
|
||||
: OPENAI_UPSTREAM_TIMEOUT_MS;
|
||||
|
||||
try {
|
||||
let lastStatus = 502;
|
||||
let lastMessage = "The chat request could not reach the upstream endpoint.";
|
||||
|
||||
for (const upstreamUrl of upstreamCandidates) {
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), upstreamTimeoutMs);
|
||||
|
||||
@@ -139,7 +136,8 @@ export async function POST(request: Request) {
|
||||
}
|
||||
|
||||
if (!upstreamResponse.ok) {
|
||||
const message =
|
||||
lastStatus = upstreamResponse.status;
|
||||
lastMessage =
|
||||
typeof parsedBody === "object" &&
|
||||
parsedBody !== null &&
|
||||
"error" in parsedBody &&
|
||||
@@ -149,33 +147,21 @@ export async function POST(request: Request) {
|
||||
typeof parsedBody.error.message === "string"
|
||||
? parsedBody.error.message
|
||||
: `The upstream endpoint returned ${upstreamResponse.status}.`;
|
||||
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: message,
|
||||
},
|
||||
{ status: upstreamResponse.status },
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!parsedBody || typeof parsedBody !== "object") {
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: "The upstream endpoint returned an unreadable response.",
|
||||
},
|
||||
{ status: 502 },
|
||||
);
|
||||
lastStatus = 502;
|
||||
lastMessage = "The upstream endpoint returned an unreadable response.";
|
||||
continue;
|
||||
}
|
||||
|
||||
const content = extractAssistantTextContent(parsedBody);
|
||||
const metrics = extractObjective5Metrics(parsedBody);
|
||||
if (!content) {
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: "The upstream endpoint returned no assistant content.",
|
||||
},
|
||||
{ status: 502 },
|
||||
);
|
||||
lastStatus = 502;
|
||||
lastMessage = "The upstream endpoint returned no assistant content.";
|
||||
continue;
|
||||
}
|
||||
|
||||
const svgMarkup = extractSvgMarkup(content);
|
||||
@@ -206,6 +192,26 @@ export async function POST(request: Request) {
|
||||
role: "assistant",
|
||||
svg: sanitizedSvg.svg,
|
||||
});
|
||||
} catch (caughtError) {
|
||||
if (caughtError instanceof Error && caughtError.name === "AbortError") {
|
||||
lastStatus = 504;
|
||||
lastMessage = `The upstream endpoint timed out after ${Math.floor(upstreamTimeoutMs / 1000)} seconds.`;
|
||||
continue;
|
||||
}
|
||||
|
||||
lastStatus = 502;
|
||||
lastMessage = "The chat request could not reach the upstream endpoint.";
|
||||
} finally {
|
||||
clearTimeout(timeoutId);
|
||||
}
|
||||
}
|
||||
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: lastMessage,
|
||||
},
|
||||
{ status: lastStatus },
|
||||
);
|
||||
} catch (caughtError) {
|
||||
if (caughtError instanceof Error && caughtError.name === "AbortError") {
|
||||
return NextResponse.json(
|
||||
@@ -222,7 +228,5 @@ export async function POST(request: Request) {
|
||||
},
|
||||
{ status: 502 },
|
||||
);
|
||||
} finally {
|
||||
clearTimeout(timeoutId);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@ import {
|
||||
extractModelOptions,
|
||||
getDefaultObjective5ModelOptions,
|
||||
getModelListEndpointCandidates,
|
||||
isLocalEndpoint,
|
||||
} from "~/lib/lab2-chat";
|
||||
|
||||
type ModelsRouteRequestBody = {
|
||||
@@ -39,15 +38,6 @@ export async function POST(request: Request) {
|
||||
);
|
||||
}
|
||||
|
||||
if (!apiKey && !isLocalEndpoint(endpoint)) {
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: "An API key is required for remote endpoints.",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
let candidates: string[];
|
||||
try {
|
||||
candidates = getModelListEndpointCandidates(endpoint);
|
||||
|
||||
@@ -13,6 +13,13 @@ describe("LabContent", () => {
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
lab1NetronUrl: "http://127.0.0.1:8338",
|
||||
lab2OllamaModels: [
|
||||
{
|
||||
label: "Gemma 4 E2B Q2",
|
||||
value: "cajina/gemma4_e2b-q2_k_xl:v01",
|
||||
},
|
||||
],
|
||||
lab2OllamaUrl: "http://127.0.0.1:11434",
|
||||
lab3TerminalUrl: "http://127.0.0.1:7681/wetty",
|
||||
}),
|
||||
{ status: 200 },
|
||||
|
||||
@@ -16,11 +16,45 @@ describe("Objective5Chat", () => {
|
||||
const fetchMock = vi.fn(async (input: RequestInfo | URL) => {
|
||||
const url = String(input);
|
||||
|
||||
if (url === "/courseware-runtime.json") {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
lab2OllamaModels: [
|
||||
{
|
||||
label: "Gemma 4 E2B Q2",
|
||||
value: "cajina/gemma4_e2b-q2_k_xl:v01",
|
||||
},
|
||||
{
|
||||
label: "Gemma 4 E2B Q4",
|
||||
value: "batiai/gemma4-e2b:q4",
|
||||
},
|
||||
{
|
||||
label: "Gemma 4 E2B Q8",
|
||||
value: "bjoernb/gemma4-e2b-fast:latest",
|
||||
},
|
||||
],
|
||||
lab2OllamaUrl: "http://127.0.0.1:11434",
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}
|
||||
|
||||
if (url === "/api/lab2/models") {
|
||||
return {
|
||||
json: async () => ({
|
||||
models: [
|
||||
{ label: "LM Studio Qwen", value: "qwen3.5-9b-mlx" },
|
||||
{
|
||||
label: "Gemma 4 E2B Q2",
|
||||
value: "cajina/gemma4_e2b-q2_k_xl:v01",
|
||||
},
|
||||
{
|
||||
label: "Gemma 4 E2B Q4",
|
||||
value: "batiai/gemma4-e2b:q4",
|
||||
},
|
||||
{
|
||||
label: "Gemma 4 E2B Q8",
|
||||
value: "bjoernb/gemma4-e2b-fast:latest",
|
||||
},
|
||||
{ label: "Custom model", value: LAB2_CUSTOM_MODEL_VALUE },
|
||||
],
|
||||
}),
|
||||
@@ -74,8 +108,12 @@ describe("Objective5Chat", () => {
|
||||
});
|
||||
|
||||
it("persists settings updates back to localStorage", async () => {
|
||||
mockFetch();
|
||||
|
||||
render(<Objective5Chat />);
|
||||
|
||||
await screen.findByLabelText("Endpoint");
|
||||
|
||||
fireEvent.change(screen.getByLabelText("Endpoint"), {
|
||||
target: { value: "https://saved.example/api" },
|
||||
});
|
||||
@@ -95,7 +133,9 @@ describe("Objective5Chat", () => {
|
||||
target: { value: "http://127.0.0.1:1234" },
|
||||
});
|
||||
|
||||
expect(await screen.findByRole("option", { name: "LM Studio Qwen" })).toBeInTheDocument();
|
||||
expect(
|
||||
await screen.findByRole("option", { name: "Gemma 4 E2B Q4" }),
|
||||
).toBeInTheDocument();
|
||||
expect(fetchMock).toHaveBeenCalledWith(
|
||||
"/api/lab2/models",
|
||||
expect.objectContaining({
|
||||
@@ -109,9 +149,6 @@ describe("Objective5Chat", () => {
|
||||
|
||||
render(<Objective5Chat />);
|
||||
|
||||
fireEvent.change(screen.getByLabelText("API key"), {
|
||||
target: { value: "sk-test" },
|
||||
});
|
||||
fireEvent.change(screen.getByLabelText("Prompt"), {
|
||||
target: { value: "Compare these quantized models." },
|
||||
});
|
||||
@@ -129,11 +166,26 @@ describe("Objective5Chat", () => {
|
||||
vi.fn(async (input: RequestInfo | URL) => {
|
||||
const url = String(input);
|
||||
|
||||
if (url === "/courseware-runtime.json") {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
lab2OllamaModels: [
|
||||
{
|
||||
label: "Gemma 4 E2B Q4",
|
||||
value: "batiai/gemma4-e2b:q4",
|
||||
},
|
||||
],
|
||||
lab2OllamaUrl: "http://127.0.0.1:11434",
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}
|
||||
|
||||
if (url === "/api/lab2/models") {
|
||||
return {
|
||||
json: async () => ({
|
||||
models: [
|
||||
{ label: "Gemma 4 E2B Q4_K_M", value: "gemma4:e2b-it-q4_K_M" },
|
||||
{ label: "Gemma 4 E2B Q4", value: "batiai/gemma4-e2b:q4" },
|
||||
{ label: "Custom model", value: LAB2_CUSTOM_MODEL_VALUE },
|
||||
],
|
||||
}),
|
||||
@@ -160,9 +212,6 @@ describe("Objective5Chat", () => {
|
||||
|
||||
render(<Objective5Chat />);
|
||||
|
||||
fireEvent.change(screen.getByLabelText("API key"), {
|
||||
target: { value: "sk-test" },
|
||||
});
|
||||
fireEvent.change(screen.getByLabelText("Prompt"), {
|
||||
target: { value: "Draw a pelican riding a bicycle." },
|
||||
});
|
||||
@@ -174,4 +223,17 @@ describe("Objective5Chat", () => {
|
||||
expect(screen.getByText("View SVG source")).toBeInTheDocument();
|
||||
expect(screen.getByText("Tokens/sec 14.4")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("defaults to the managed Ollama runtime endpoint and models", async () => {
|
||||
mockFetch();
|
||||
|
||||
render(<Objective5Chat />);
|
||||
|
||||
expect(await screen.findByLabelText("Endpoint")).toHaveValue(
|
||||
"http://localhost:11434/",
|
||||
);
|
||||
expect(screen.getByLabelText("Model")).toHaveValue(
|
||||
"cajina/gemma4_e2b-q2_k_xl:v01",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,14 +1,18 @@
|
||||
"use client";
|
||||
|
||||
import { FormEvent, useCallback, useEffect, useMemo, useState } from "react";
|
||||
import {
|
||||
fetchCoursewareRuntimeConfig,
|
||||
normalizeCoursewareRuntimeConfig,
|
||||
} from "~/lib/courseware-runtime";
|
||||
import {
|
||||
getActiveModel,
|
||||
getDefaultObjective5ModelOptions,
|
||||
getDefaultObjective5Settings,
|
||||
isLocalEndpoint,
|
||||
LAB2_CHAT_STORAGE_KEY,
|
||||
LAB2_CUSTOM_MODEL_VALUE,
|
||||
LAB2_DEFAULT_ENDPOINT,
|
||||
LAB2_LEGACY_DEFAULT_ENDPOINT,
|
||||
type Objective5ModelOption,
|
||||
type Objective5Metrics,
|
||||
type Objective5Message,
|
||||
@@ -93,8 +97,21 @@ function toApiConversation(messages: ChatTurn[]) {
|
||||
}
|
||||
|
||||
export function Objective5Chat() {
|
||||
const defaults = useMemo(() => getDefaultObjective5Settings(), []);
|
||||
const defaultModelOptions = useMemo(() => getDefaultObjective5ModelOptions(), []);
|
||||
const runtimeDefaults = useMemo(() => normalizeCoursewareRuntimeConfig(), []);
|
||||
const defaults = useMemo(
|
||||
() => ({
|
||||
...getDefaultObjective5Settings(),
|
||||
endpoint: runtimeDefaults.lab2OllamaUrl,
|
||||
selectedModel:
|
||||
runtimeDefaults.lab2OllamaModels[0]?.value ??
|
||||
getDefaultObjective5Settings().selectedModel,
|
||||
}),
|
||||
[runtimeDefaults.lab2OllamaModels, runtimeDefaults.lab2OllamaUrl],
|
||||
);
|
||||
const defaultModelOptions = useMemo(
|
||||
() => ensureCustomOption(runtimeDefaults.lab2OllamaModels),
|
||||
[runtimeDefaults.lab2OllamaModels],
|
||||
);
|
||||
const [endpoint, setEndpoint] = useState(defaults.endpoint);
|
||||
const [apiKey, setApiKey] = useState(defaults.apiKey);
|
||||
const [selectedModel, setSelectedModel] = useState(defaults.selectedModel);
|
||||
@@ -112,23 +129,70 @@ export function Objective5Chat() {
|
||||
const activeModel = getActiveModel(selectedModel, customModel);
|
||||
|
||||
useEffect(() => {
|
||||
let isCancelled = false;
|
||||
|
||||
async function loadInitialSettings() {
|
||||
const resolvedRuntime = await fetchCoursewareRuntimeConfig().catch(() =>
|
||||
normalizeCoursewareRuntimeConfig(),
|
||||
);
|
||||
if (isCancelled) {
|
||||
return;
|
||||
}
|
||||
|
||||
const resolvedModelOptions = ensureCustomOption(
|
||||
resolvedRuntime.lab2OllamaModels,
|
||||
);
|
||||
const resolvedSelectedModel =
|
||||
resolvedRuntime.lab2OllamaModels[0]?.value ?? defaults.selectedModel;
|
||||
|
||||
setModelOptions(resolvedModelOptions);
|
||||
|
||||
try {
|
||||
const savedSettings = window.localStorage.getItem(LAB2_CHAT_STORAGE_KEY);
|
||||
if (!savedSettings) {
|
||||
setEndpoint(resolvedRuntime.lab2OllamaUrl);
|
||||
setSelectedModel(resolvedSelectedModel);
|
||||
setCustomModel("");
|
||||
setApiKey("");
|
||||
setHasLoadedSettings(true);
|
||||
return;
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(savedSettings) as Partial<typeof defaults>;
|
||||
setEndpoint(parsed.endpoint?.trim() || LAB2_DEFAULT_ENDPOINT);
|
||||
const savedEndpoint = parsed.endpoint?.trim();
|
||||
const nextEndpoint =
|
||||
!savedEndpoint || savedEndpoint === LAB2_LEGACY_DEFAULT_ENDPOINT
|
||||
? resolvedRuntime.lab2OllamaUrl
|
||||
: savedEndpoint;
|
||||
const savedSelectedModel = parsed.selectedModel?.trim();
|
||||
const nextSelectedModel =
|
||||
savedSelectedModel === LAB2_CUSTOM_MODEL_VALUE ||
|
||||
resolvedModelOptions.some(
|
||||
(option) => option.value === savedSelectedModel,
|
||||
)
|
||||
? savedSelectedModel || resolvedSelectedModel
|
||||
: resolvedSelectedModel;
|
||||
|
||||
setEndpoint(nextEndpoint);
|
||||
setApiKey(parsed.apiKey ?? "");
|
||||
setSelectedModel(parsed.selectedModel?.trim() || defaults.selectedModel);
|
||||
setSelectedModel(nextSelectedModel);
|
||||
setCustomModel(parsed.customModel?.trim() || "");
|
||||
} catch {
|
||||
window.localStorage.removeItem(LAB2_CHAT_STORAGE_KEY);
|
||||
setEndpoint(resolvedRuntime.lab2OllamaUrl);
|
||||
setSelectedModel(resolvedSelectedModel);
|
||||
setCustomModel("");
|
||||
setApiKey("");
|
||||
} finally {
|
||||
setHasLoadedSettings(true);
|
||||
}
|
||||
}
|
||||
|
||||
void loadInitialSettings();
|
||||
|
||||
return () => {
|
||||
isCancelled = true;
|
||||
};
|
||||
}, [defaults.selectedModel]);
|
||||
|
||||
useEffect(() => {
|
||||
@@ -154,11 +218,6 @@ export function Objective5Chat() {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!trimmedKey && !isLocalEndpoint(trimmedEndpoint)) {
|
||||
setModelError("Enter an API key before refreshing remote models.");
|
||||
return;
|
||||
}
|
||||
|
||||
setIsRefreshingModels(true);
|
||||
setModelError(null);
|
||||
|
||||
@@ -210,7 +269,6 @@ export function Objective5Chat() {
|
||||
useEffect(() => {
|
||||
if (!hasLoadedSettings) return;
|
||||
if (!endpoint.trim()) return;
|
||||
if (!apiKey.trim() && !isLocalEndpoint(endpoint.trim())) return;
|
||||
|
||||
void refreshModels();
|
||||
}, [apiKey, endpoint, hasLoadedSettings, refreshModels]);
|
||||
@@ -227,11 +285,6 @@ export function Objective5Chat() {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!trimmedKey && !isLocalEndpoint(trimmedEndpoint)) {
|
||||
setError("Enter an API key before sending a prompt to a remote endpoint.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!activeModel) {
|
||||
setError("Choose one of the quantized models or enter a custom model name.");
|
||||
return;
|
||||
@@ -305,7 +358,7 @@ export function Objective5Chat() {
|
||||
<section className="objective5-chat" data-widget-enhanced="true">
|
||||
<div className="objective5-chat__header">
|
||||
<p className="objective5-chat__eyebrow">Objective 5 Lab Widget</p>
|
||||
<h3>Compare qualitative output with a hosted chat endpoint</h3>
|
||||
<h3>Compare qualitative output with the managed Ollama endpoint</h3>
|
||||
<p className="objective5-chat__lede">
|
||||
Switch between quantized models, reuse the same prompt, and ask for
|
||||
text or simple SVG sketches like{" "}
|
||||
@@ -379,9 +432,9 @@ export function Objective5Chat() {
|
||||
</div>
|
||||
|
||||
<p className="objective5-chat__settings-note">
|
||||
Settings stay in your browser for this lab only. Available models are
|
||||
refreshed from the configured endpoint, and changing the model does not
|
||||
clear the transcript.
|
||||
This widget starts on the courseware Ollama service by default. You can
|
||||
still swap the endpoint, add an API key, refresh the available models,
|
||||
and change models without clearing the transcript.
|
||||
</p>
|
||||
|
||||
{modelError ? (
|
||||
|
||||
@@ -1,14 +1,38 @@
|
||||
export const COURSEWARE_RUNTIME_CONFIG_PATH = "/courseware-runtime.json";
|
||||
export const LAB1_DEFAULT_NETRON_URL = "http://127.0.0.1:8338";
|
||||
export const LAB2_DEFAULT_OLLAMA_URL = "http://127.0.0.1:11434";
|
||||
export const LAB2_DEFAULT_OLLAMA_MODELS = [
|
||||
{
|
||||
label: "Gemma 4 E2B Q2",
|
||||
value: "cajina/gemma4_e2b-q2_k_xl:v01",
|
||||
},
|
||||
{
|
||||
label: "Gemma 4 E2B Q4",
|
||||
value: "batiai/gemma4-e2b:q4",
|
||||
},
|
||||
{
|
||||
label: "Gemma 4 E2B Q8",
|
||||
value: "bjoernb/gemma4-e2b-fast:latest",
|
||||
},
|
||||
] as const;
|
||||
export const LAB3_DEFAULT_TERMINAL_PATH = "/wetty";
|
||||
|
||||
export type CoursewareRuntimeModelOption = {
|
||||
label: string;
|
||||
value: string;
|
||||
};
|
||||
|
||||
export type CoursewareRuntimeConfig = {
|
||||
lab1NetronUrl?: string;
|
||||
lab2OllamaModels?: CoursewareRuntimeModelOption[];
|
||||
lab2OllamaUrl?: string;
|
||||
lab3TerminalUrl?: string;
|
||||
};
|
||||
|
||||
export type ResolvedCoursewareRuntimeConfig = {
|
||||
lab1NetronUrl: string;
|
||||
lab2OllamaModels: CoursewareRuntimeModelOption[];
|
||||
lab2OllamaUrl: string;
|
||||
lab3TerminalUrl: string;
|
||||
};
|
||||
|
||||
@@ -50,6 +74,48 @@ export function getLab1NetronUrl(
|
||||
return rewriteLoopbackHost(trimmedValue, currentHostname);
|
||||
}
|
||||
|
||||
export function getLab2OllamaUrl(
|
||||
envValue?: string,
|
||||
currentHostname = getCurrentHostname(),
|
||||
) {
|
||||
const trimmedValue = envValue?.trim();
|
||||
|
||||
if (!trimmedValue) {
|
||||
return rewriteLoopbackHost(LAB2_DEFAULT_OLLAMA_URL, currentHostname);
|
||||
}
|
||||
|
||||
return rewriteLoopbackHost(trimmedValue, currentHostname);
|
||||
}
|
||||
|
||||
export function getLab2OllamaModels(
|
||||
envValue?: CoursewareRuntimeModelOption[],
|
||||
) {
|
||||
if (!Array.isArray(envValue) || envValue.length === 0) {
|
||||
return LAB2_DEFAULT_OLLAMA_MODELS.map((model) => ({ ...model }));
|
||||
}
|
||||
|
||||
const normalizedModels = envValue
|
||||
.map((model) => {
|
||||
const label = model?.label?.trim();
|
||||
const value = model?.value?.trim();
|
||||
|
||||
if (!label || !value) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return { label, value } satisfies CoursewareRuntimeModelOption;
|
||||
})
|
||||
.filter(
|
||||
(model): model is CoursewareRuntimeModelOption => model !== null,
|
||||
);
|
||||
|
||||
if (normalizedModels.length === 0) {
|
||||
return LAB2_DEFAULT_OLLAMA_MODELS.map((model) => ({ ...model }));
|
||||
}
|
||||
|
||||
return normalizedModels;
|
||||
}
|
||||
|
||||
export function getLab3TerminalPath(
|
||||
envValue?: string,
|
||||
currentHostname = getCurrentHostname(),
|
||||
@@ -73,6 +139,8 @@ export function normalizeCoursewareRuntimeConfig(
|
||||
): ResolvedCoursewareRuntimeConfig {
|
||||
return {
|
||||
lab1NetronUrl: getLab1NetronUrl(config?.lab1NetronUrl, currentHostname),
|
||||
lab2OllamaModels: getLab2OllamaModels(config?.lab2OllamaModels),
|
||||
lab2OllamaUrl: getLab2OllamaUrl(config?.lab2OllamaUrl, currentHostname),
|
||||
lab3TerminalUrl: getLab3TerminalPath(
|
||||
config?.lab3TerminalUrl,
|
||||
currentHostname,
|
||||
|
||||
@@ -4,6 +4,7 @@ import {
|
||||
extractObjective5Metrics,
|
||||
extractModelOptions,
|
||||
extractSvgMarkup,
|
||||
getOllamaChatEndpointCandidates,
|
||||
getModelListEndpointCandidates,
|
||||
isLocalEndpoint,
|
||||
normalizeOllamaChatEndpoint,
|
||||
@@ -38,21 +39,31 @@ describe("extractSvgMarkup", () => {
|
||||
});
|
||||
|
||||
describe("normalizeOllamaChatEndpoint", () => {
|
||||
it("appends the ollama chat path to a base api endpoint", () => {
|
||||
expect(normalizeOllamaChatEndpoint("https://ai.zuccaro.me/api")).toBe(
|
||||
"https://ai.zuccaro.me/ollama/api/chat",
|
||||
it("prefers the native ollama chat path for a bare ollama endpoint", () => {
|
||||
expect(normalizeOllamaChatEndpoint("http://127.0.0.1:11434")).toBe(
|
||||
"http://127.0.0.1:11434/api/chat",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getModelListEndpointCandidates", () => {
|
||||
it("prefers v1 models for bare local endpoints", () => {
|
||||
it("tries native ollama tags before openai model listings for bare local endpoints", () => {
|
||||
expect(getModelListEndpointCandidates("http://127.0.0.1:1234")).toEqual([
|
||||
"http://127.0.0.1:1234/api/tags",
|
||||
"http://127.0.0.1:1234/v1/models",
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getOllamaChatEndpointCandidates", () => {
|
||||
it("tries native and proxied ollama chat routes", () => {
|
||||
expect(getOllamaChatEndpointCandidates("http://127.0.0.1:8080")).toEqual([
|
||||
"http://127.0.0.1:8080/api/chat",
|
||||
"http://127.0.0.1:8080/ollama/api/chat",
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isLocalEndpoint", () => {
|
||||
it("detects localhost endpoints", () => {
|
||||
expect(isLocalEndpoint("http://127.0.0.1:1234")).toBe(true);
|
||||
@@ -115,12 +126,32 @@ describe("extractModelOptions", () => {
|
||||
extractModelOptions({
|
||||
data: [
|
||||
{ id: "qwen3.5-9b-mlx", object: "model" },
|
||||
{ id: "gemma4:e2b-it-q4_K_M", name: "Gemma 4 E2B Q4_K_M" },
|
||||
{ id: "batiai/gemma4-e2b:q4", name: "Gemma 4 E2B Q4" },
|
||||
],
|
||||
}),
|
||||
).toEqual([
|
||||
{ label: "qwen3.5-9b-mlx", value: "qwen3.5-9b-mlx" },
|
||||
{ label: "Gemma 4 E2B Q4_K_M", value: "gemma4:e2b-it-q4_K_M" },
|
||||
{ label: "Gemma 4 E2B Q4", value: "batiai/gemma4-e2b:q4" },
|
||||
]);
|
||||
});
|
||||
|
||||
it("maps ollama tag payloads into dropdown options", () => {
|
||||
expect(
|
||||
extractModelOptions({
|
||||
models: [
|
||||
{ model: "cajina/gemma4_e2b-q2_k_xl:v01" },
|
||||
{ name: "bjoernb/gemma4-e2b-fast:latest" },
|
||||
],
|
||||
}),
|
||||
).toEqual([
|
||||
{
|
||||
label: "Gemma 4 E2B Q2",
|
||||
value: "cajina/gemma4_e2b-q2_k_xl:v01",
|
||||
},
|
||||
{
|
||||
label: "Gemma 4 E2B Q8",
|
||||
value: "bjoernb/gemma4-e2b-fast:latest",
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
+92
-27
@@ -3,23 +3,21 @@ import {
|
||||
XMLSerializer,
|
||||
type Element as XmlDomElement,
|
||||
} from "@xmldom/xmldom";
|
||||
import {
|
||||
LAB2_DEFAULT_OLLAMA_MODELS,
|
||||
LAB2_DEFAULT_OLLAMA_URL,
|
||||
} from "~/lib/courseware-runtime";
|
||||
|
||||
export const LAB2_CHAT_STORAGE_KEY = "lab2-objective5-chat-settings";
|
||||
export const LAB2_DEFAULT_ENDPOINT = "https://ai.zuccaro.me/api";
|
||||
export const LAB2_DEFAULT_ENDPOINT = LAB2_DEFAULT_OLLAMA_URL;
|
||||
export const LAB2_LEGACY_DEFAULT_ENDPOINT = "https://ai.zuccaro.me/api";
|
||||
export const LAB2_CUSTOM_MODEL_VALUE = "__custom__";
|
||||
export const LAB2_MAX_CONTEXT_MESSAGES = 10;
|
||||
export const LAB2_MAX_MESSAGE_LENGTH = 4000;
|
||||
export const LAB2_MAX_SVG_LENGTH = 20000;
|
||||
|
||||
export const LAB2_MODEL_OPTIONS = [
|
||||
{
|
||||
label: "Gemma 4 E2B Q8_0",
|
||||
value: "gemma4:e2b-it-q8_0",
|
||||
},
|
||||
{
|
||||
label: "Gemma 4 E2B Q4_K_M",
|
||||
value: "gemma4:e2b-it-q4_K_M",
|
||||
},
|
||||
...LAB2_DEFAULT_OLLAMA_MODELS,
|
||||
{
|
||||
label: "Custom model",
|
||||
value: LAB2_CUSTOM_MODEL_VALUE,
|
||||
@@ -111,6 +109,10 @@ export type SvgSanitizationResult =
|
||||
| SvgSanitizationFailure
|
||||
| SvgSanitizationSuccess;
|
||||
|
||||
const predefinedModelLabels = new Map<string, string>(
|
||||
LAB2_DEFAULT_OLLAMA_MODELS.map((model) => [model.value, model.label]),
|
||||
);
|
||||
|
||||
const SVG_NAMESPACE = "http://www.w3.org/2000/svg";
|
||||
const allowedSvgElements = new Set([
|
||||
"svg",
|
||||
@@ -246,24 +248,35 @@ export function getModelListEndpointCandidates(endpoint: string) {
|
||||
const url = new URL(endpoint);
|
||||
const trimmedPath = url.pathname.replace(/\/+$/, "");
|
||||
|
||||
if (trimmedPath.endsWith("/models")) {
|
||||
if (
|
||||
trimmedPath.endsWith("/models") ||
|
||||
trimmedPath.endsWith("/api/tags")
|
||||
) {
|
||||
url.hash = "";
|
||||
return [url.toString()];
|
||||
}
|
||||
|
||||
const paths = new Set<string>();
|
||||
|
||||
if (trimmedPath.endsWith("/api")) {
|
||||
if (trimmedPath.endsWith("/ollama/api")) {
|
||||
paths.add("/ollama/api/tags");
|
||||
} else if (trimmedPath.endsWith("/ollama")) {
|
||||
paths.add("/ollama/api/tags");
|
||||
} else if (trimmedPath.endsWith("/api")) {
|
||||
paths.add("/api/tags");
|
||||
paths.add("/api/v1/models");
|
||||
paths.add("/api/models");
|
||||
} else if (trimmedPath.endsWith("/api/v1")) {
|
||||
paths.add("/api/tags");
|
||||
paths.add("/api/v1/models");
|
||||
paths.add("/api/models");
|
||||
} else if (trimmedPath.endsWith("/v1")) {
|
||||
paths.add("/v1/models");
|
||||
} else if (trimmedPath.length === 0) {
|
||||
paths.add("/api/tags");
|
||||
paths.add("/v1/models");
|
||||
} else {
|
||||
paths.add(`${trimmedPath}/api/tags`);
|
||||
paths.add(`${trimmedPath}/v1/models`);
|
||||
paths.add(`${trimmedPath}/models`);
|
||||
}
|
||||
@@ -277,21 +290,48 @@ export function getModelListEndpointCandidates(endpoint: string) {
|
||||
}
|
||||
|
||||
export function normalizeOllamaChatEndpoint(endpoint: string) {
|
||||
return getOllamaChatEndpointCandidates(endpoint)[0];
|
||||
}
|
||||
|
||||
export function getOllamaChatEndpointCandidates(endpoint: string) {
|
||||
const url = new URL(endpoint);
|
||||
const trimmedPath = url.pathname.replace(/\/+$/, "");
|
||||
|
||||
if (trimmedPath.endsWith("/ollama/api/chat")) {
|
||||
if (
|
||||
trimmedPath.endsWith("/api/chat") ||
|
||||
trimmedPath.endsWith("/ollama/api/chat")
|
||||
) {
|
||||
url.pathname = trimmedPath;
|
||||
} else if (trimmedPath.endsWith("/api") || trimmedPath.endsWith("/api/v1")) {
|
||||
url.pathname = "/ollama/api/chat";
|
||||
} else if (trimmedPath.length === 0) {
|
||||
url.pathname = "/ollama/api/chat";
|
||||
} else {
|
||||
url.pathname = `${trimmedPath}/ollama/api/chat`;
|
||||
url.hash = "";
|
||||
return [url.toString()];
|
||||
}
|
||||
|
||||
url.hash = "";
|
||||
return url.toString();
|
||||
const paths = new Set<string>();
|
||||
|
||||
if (trimmedPath.endsWith("/ollama/api")) {
|
||||
paths.add("/ollama/api/chat");
|
||||
} else if (trimmedPath.endsWith("/ollama")) {
|
||||
paths.add("/ollama/api/chat");
|
||||
} else if (trimmedPath.endsWith("/api")) {
|
||||
paths.add("/api/chat");
|
||||
paths.add("/ollama/api/chat");
|
||||
} else if (trimmedPath.endsWith("/api/v1") || trimmedPath.endsWith("/v1")) {
|
||||
paths.add("/ollama/api/chat");
|
||||
paths.add("/api/chat");
|
||||
} else if (trimmedPath.length === 0) {
|
||||
paths.add("/api/chat");
|
||||
paths.add("/ollama/api/chat");
|
||||
} else {
|
||||
paths.add(`${trimmedPath}/api/chat`);
|
||||
paths.add(`${trimmedPath}/ollama/api/chat`);
|
||||
}
|
||||
|
||||
return Array.from(paths).map((path) => {
|
||||
const candidate = new URL(url.toString());
|
||||
candidate.pathname = path;
|
||||
candidate.hash = "";
|
||||
return candidate.toString();
|
||||
});
|
||||
}
|
||||
|
||||
export function looksLikeOllamaModel(model: string) {
|
||||
@@ -373,15 +413,11 @@ export function extractAssistantTextContent(payload: ChatCompletionPayload) {
|
||||
}
|
||||
|
||||
export function extractModelOptions(payload: unknown): Objective5ModelOption[] {
|
||||
if (
|
||||
!payload ||
|
||||
typeof payload !== "object" ||
|
||||
!("data" in payload) ||
|
||||
!Array.isArray(payload.data)
|
||||
) {
|
||||
if (!payload || typeof payload !== "object") {
|
||||
return [];
|
||||
}
|
||||
|
||||
if ("data" in payload && Array.isArray(payload.data)) {
|
||||
return payload.data
|
||||
.map((item) => {
|
||||
if (!item || typeof item !== "object") return null;
|
||||
@@ -391,7 +427,7 @@ export function extractModelOptions(payload: unknown): Objective5ModelOption[] {
|
||||
const label =
|
||||
"name" in item && typeof item.name === "string" && item.name.trim()
|
||||
? item.name.trim()
|
||||
: value;
|
||||
: getModelLabel(value);
|
||||
|
||||
if (!value) return null;
|
||||
return { label, value } satisfies Objective5ModelOption;
|
||||
@@ -399,6 +435,31 @@ export function extractModelOptions(payload: unknown): Objective5ModelOption[] {
|
||||
.filter((item): item is Objective5ModelOption => item !== null);
|
||||
}
|
||||
|
||||
if (!("models" in payload) || !Array.isArray(payload.models)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return payload.models
|
||||
.map((item) => {
|
||||
if (!item || typeof item !== "object") return null;
|
||||
|
||||
const value =
|
||||
("model" in item && typeof item.model === "string" && item.model.trim()
|
||||
? item.model
|
||||
: "name" in item && typeof item.name === "string"
|
||||
? item.name
|
||||
: ""
|
||||
).trim();
|
||||
|
||||
if (!value) return null;
|
||||
return {
|
||||
label: getModelLabel(value),
|
||||
value,
|
||||
} satisfies Objective5ModelOption;
|
||||
})
|
||||
.filter((item): item is Objective5ModelOption => item !== null);
|
||||
}
|
||||
|
||||
export function extractObjective5Metrics(
|
||||
payload: ChatCompletionPayload,
|
||||
): Objective5Metrics | null {
|
||||
@@ -600,6 +661,10 @@ export function getDefaultObjective5ModelOptions(): Objective5ModelOption[] {
|
||||
return [...LAB2_MODEL_OPTIONS];
|
||||
}
|
||||
|
||||
function getModelLabel(value: string) {
|
||||
return predefinedModelLabels.get(value) ?? value;
|
||||
}
|
||||
|
||||
function validateSvgNode(node: XmlDomElement): string | null {
|
||||
if (!allowedSvgElements.has(node.tagName)) {
|
||||
return `The SVG used a blocked element: <${node.tagName}>.`;
|
||||
|
||||
Reference in New Issue
Block a user