diff --git a/README.md b/README.md index 17dfa45..1b5c92d 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,8 @@ If CUDA is already mounted or preinstalled outside `PATH`, the installer now det - The host-side install path assumes modern local tooling, but TransformerLab itself is provisioned from a pinned classic single-user layout. - TransformerLab is intentionally pinned to the older single-user `v0.28.2` release because newer upstream releases changed the project structure and behavior in ways that break this courseware. - This project does not rely on TransformerLab's upstream `install.sh`; the Ansible role provisions the pinned release directly so web assets, env layout, and runtime behavior stay reproducible. -- The courseware repairs installed TransformerLab Fastchat plugin manifests so Fastchat-gated features such as Model Architecture and Visualize Logprobs stay available on pinned installs. +- The courseware repairs the pinned TransformerLab install for symlink-aware plugin file lookups and refreshes installed Fastchat plugin manifests so Fastchat-gated features such as Model Architecture, activations, and Visualize Logprobs stay available on pinned installs. +- The managed default TransformerLab student account is also seeded with the courseware Fastchat plugin plus the starter experiments and model metadata that `labctl up` depends on. - No Ollama models are pulled during `./labctl up`; students pull models manually as part of the courseware. - WhiteRabbitNeo assets are handled separately from `./labctl up` and `./labctl preflight`. - Run `./labctl assets lab2` when you want to populate repo-local lab 2 assets in `assets/lab2/` from Hugging Face. diff --git a/ansible/roles/transformerlab/tasks/main.yml b/ansible/roles/transformerlab/tasks/main.yml index 379bcd7..c04dbcd 100644 --- a/ansible/roles/transformerlab/tasks/main.yml +++ b/ansible/roles/transformerlab/tasks/main.yml @@ -233,12 +233,27 @@ dest: "{{ courseware_state_dir }}/repair_transformerlab_plugin_supports.py" mode: "0755" +- name: Install TransformerLab source repair helper + copy: + src: "{{ playbook_dir }}/../../scripts/repair_transformerlab_symlink_paths.py" + dest: "{{ courseware_state_dir }}/repair_transformerlab_symlink_paths.py" + mode: "0755" + - name: Install TransformerLab default-user helper copy: src: "{{ playbook_dir }}/../../scripts/ensure_transformerlab_user.py" dest: "{{ courseware_state_dir }}/ensure_transformerlab_user.py" mode: "0755" +- name: Repair pinned TransformerLab symlink-aware plugin file lookups + command: + argv: + - python3 + - "{{ courseware_state_dir }}/repair_transformerlab_symlink_paths.py" + - --transformerlab-dir + - "{{ courseware_transformerlab_home }}" + changed_when: false + - name: Repair installed Fastchat plugin supports command: argv: diff --git a/scripts/ensure_transformerlab_user.py b/scripts/ensure_transformerlab_user.py index 1989a50..4bf3dba 100644 --- a/scripts/ensure_transformerlab_user.py +++ b/scripts/ensure_transformerlab_user.py @@ -4,9 +4,15 @@ from __future__ import annotations import argparse import asyncio import os +import shutil import sys from pathlib import Path +DEFAULT_WORKSPACE_PLUGINS = ("fastchat_server",) +DEFAULT_WORKSPACE_EXPERIMENTS = ("alpha", "beta", "gamma") +DEFAULT_WORKSPACE_MODELS = ("unsloth_Llama-3.2-1B-Instruct",) +DEFAULT_MODEL_METADATA_FILES = ("_tlab_complete_provenance.json",) + def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( @@ -36,6 +42,92 @@ def bootstrap_source(transformerlab_dir: Path) -> None: os.environ.setdefault(key.strip(), value.strip().strip('"').strip("'")) +def target_workspace(transformerlab_dir: Path, team_id: str) -> Path: + return transformerlab_dir / "orgs" / team_id / "workspace" + + +def workspace_team_id(workspace: Path, transformerlab_dir: Path) -> str | None: + orgs_dir = transformerlab_dir / "orgs" + try: + relative = workspace.relative_to(orgs_dir) + except ValueError: + return None + + if len(relative.parts) >= 2 and relative.parts[1] == "workspace": + return relative.parts[0] + return None + + +def candidate_workspaces(transformerlab_dir: Path, excluded_team_id: str) -> list[Path]: + candidates: list[Path] = [] + root_workspace = transformerlab_dir / "workspace" + if root_workspace.is_dir(): + candidates.append(root_workspace) + + orgs_dir = transformerlab_dir / "orgs" + if not orgs_dir.is_dir(): + return candidates + + for workspace in sorted(orgs_dir.glob("*/workspace")): + if not workspace.is_dir(): + continue + if workspace_team_id(workspace, transformerlab_dir) == excluded_team_id: + continue + candidates.append(workspace) + return candidates + + +def copy_dir_if_missing(source: Path | None, target: Path, label: str) -> bool: + if source is None or not source.is_dir() or target.exists(): + return False + + target.parent.mkdir(parents=True, exist_ok=True) + shutil.copytree(source, target) + print(f"Seeded {label} from {source}.") + return True + + +def copy_file_if_missing(source: Path | None, target: Path, label: str) -> bool: + if source is None or not source.is_file() or target.exists(): + return False + + target.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(source, target) + print(f"Seeded {label} from {source}.") + return True + + +def find_workspace_seed(transformerlab_dir: Path, category: str, name: str, excluded_team_id: str) -> Path | None: + for workspace in candidate_workspaces(transformerlab_dir, excluded_team_id): + candidate = workspace / category / name + if candidate.exists(): + return candidate + return None + + +def seed_workspace(transformerlab_dir: Path, team_id: str) -> None: + workspace = target_workspace(transformerlab_dir, team_id) + workspace.mkdir(parents=True, exist_ok=True) + + for plugin in DEFAULT_WORKSPACE_PLUGINS: + source = transformerlab_dir / "src" / "transformerlab" / "plugins" / plugin + copy_dir_if_missing(source, workspace / "plugins" / plugin, f"plugin '{plugin}'") + + for experiment in DEFAULT_WORKSPACE_EXPERIMENTS: + source = find_workspace_seed(transformerlab_dir, "experiments", experiment, team_id) + copy_dir_if_missing(source, workspace / "experiments" / experiment, f"experiment '{experiment}'") + + copied_model = False + for model in DEFAULT_WORKSPACE_MODELS: + source = find_workspace_seed(transformerlab_dir, "models", model, team_id) + copied_model = copy_dir_if_missing(source, workspace / "models" / model, f"model '{model}'") or copied_model + + for metadata_name in DEFAULT_MODEL_METADATA_FILES: + source = find_workspace_seed(transformerlab_dir, "models", metadata_name, team_id) + if copied_model or source is not None: + copy_file_if_missing(source, workspace / "models" / metadata_name, f"model metadata '{metadata_name}'") + + async def ensure_user(args: argparse.Namespace) -> int: from sqlalchemy import select from transformerlab.db.constants import DATABASE_FILE_NAME @@ -129,6 +221,8 @@ async def ensure_user(args: argparse.Namespace) -> int: await session.commit() print(f"Updated team role to owner for {args.email}.") + seed_workspace(Path(args.transformerlab_dir), str(user_team.team_id)) + action = "Created" if created else "Verified" print(f"{action} default TransformerLab user {args.email}.") return 0 diff --git a/scripts/repair_transformerlab_symlink_paths.py b/scripts/repair_transformerlab_symlink_paths.py new file mode 100644 index 0000000..068dd70 --- /dev/null +++ b/scripts/repair_transformerlab_symlink_paths.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +"""Patch pinned TransformerLab source to tolerate symlinked home directories.""" + +from __future__ import annotations + +import argparse +import re +import sys +from pathlib import Path + + +PATCH_MARKER = "with symlinked TransformerLab home directories." +TARGET_BLOCK = re.compile( + r"(?P[ \t]+)# The following prevents path traversal attacks:.*?" + r"(?P=indent)# now get the file contents", + re.DOTALL, +) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument("--transformerlab-dir", required=True) + return parser.parse_args() + + +def repair_plugins_router(path: Path) -> bool: + source = path.read_text(encoding="utf-8") + if PATCH_MARKER in source: + return False + + replacement = ( + " # The following prevents path traversal attacks while remaining compatible\n" + " # with symlinked TransformerLab home directories.\n" + " plugin_dir = Path(await lab_dirs.plugin_dir_by_name((pluginId)))\n" + " resolved_plugin_dir = plugin_dir.resolve()\n" + ' final_path = (plugin_dir / f"{filename}{file_ext}").resolve()\n' + "\n" + " try:\n" + " final_path.relative_to(resolved_plugin_dir)\n" + " except ValueError:\n" + ' return {"message": f"File {filename}{file_ext} is outside plugin directory"}\n' + "\n" + " # now get the file contents" + ) + updated, count = TARGET_BLOCK.subn(replacement, source, count=1) + if count != 1: + raise RuntimeError(f"Could not find path traversal block in {path}") + + path.write_text(updated, encoding="utf-8") + return True + + +def main() -> int: + args = parse_args() + root = Path(args.transformerlab_dir).expanduser().resolve() + plugins_router = root / "src" / "transformerlab" / "routers" / "experiment" / "plugins.py" + if not plugins_router.exists(): + print(f"missing TransformerLab plugins router: {plugins_router}", file=sys.stderr) + return 1 + + changed = repair_plugins_router(plugins_router) + if changed: + print(f"patched {plugins_router}") + else: + print(f"already patched {plugins_router}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/service_manager.sh b/scripts/service_manager.sh index 1f5dd55..a4c6bea 100755 --- a/scripts/service_manager.sh +++ b/scripts/service_manager.sh @@ -160,6 +160,8 @@ start_one() { ;; transformerlab) if command -v python3 >/dev/null 2>&1; then + python3 "$SCRIPT_DIR/repair_transformerlab_symlink_paths.py" \ + --transformerlab-dir "$TRANSFORMERLAB_DIR" >>"$STATE_DIR/logs/transformerlab_source_repairs.log" 2>&1 || true python3 "$SCRIPT_DIR/repair_transformerlab_plugin_supports.py" \ --transformerlab-dir "$TRANSFORMERLAB_DIR" \ --plugin "fastchat_server" \