Files
LLM-Labs-Local/scripts/bootstrap_lab2_assets.sh
2026-03-31 19:46:14 -06:00

302 lines
7.7 KiB
Bash

#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
LAB2_ASSETS_DIR="$ROOT_DIR/assets/lab2"
BASE_REPO_URL="https://huggingface.co/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B"
BASE_REPO_REF="main"
BASE_REPO_DIR="$LAB2_ASSETS_DIR/WhiteRabbitNeo-V3-7B"
GGUF_DIR="$LAB2_ASSETS_DIR/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF"
REFRESH_DOWNLOADS=0
GGUF_URLS=(
"https://huggingface.co/bartowski/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/resolve/main/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-Q4_K_M.gguf?download=true"
"https://huggingface.co/bartowski/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/resolve/main/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-Q8_0.gguf?download=true"
"https://huggingface.co/bartowski/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/resolve/main/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-IQ2_M.gguf?download=true"
)
CLONED_ITEMS=()
REFRESHED_ITEMS=()
DOWNLOADED_ITEMS=()
SKIPPED_ITEMS=()
FAILED_ITEMS=()
usage() {
cat <<'EOF'
Usage: ./labctl assets lab2 [--refresh]
Populate repo-local lab 2 assets from Hugging Face without touching `up` or `preflight`.
Actions:
- Clone or refresh WhiteRabbitNeo-V3-7B into assets/lab2/WhiteRabbitNeo-V3-7B
- Download the supported GGUF files into assets/lab2/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF
Options:
--refresh Re-download GGUF files even if matching files already exist
-h, --help Show this help text
EOF
}
record_failure() {
FAILED_ITEMS+=("$1")
}
record_skip() {
SKIPPED_ITEMS+=("$1")
}
record_clone() {
CLONED_ITEMS+=("$1")
}
record_refresh() {
REFRESHED_ITEMS+=("$1")
}
record_download() {
DOWNLOADED_ITEMS+=("$1")
}
print_summary() {
echo
echo "Lab 2 asset bootstrap summary:"
if [ "${#CLONED_ITEMS[@]}" -gt 0 ]; then
echo " Cloned:"
printf ' - %s\n' "${CLONED_ITEMS[@]}"
fi
if [ "${#REFRESHED_ITEMS[@]}" -gt 0 ]; then
echo " Refreshed:"
printf ' - %s\n' "${REFRESHED_ITEMS[@]}"
fi
if [ "${#DOWNLOADED_ITEMS[@]}" -gt 0 ]; then
echo " Downloaded:"
printf ' - %s\n' "${DOWNLOADED_ITEMS[@]}"
fi
if [ "${#SKIPPED_ITEMS[@]}" -gt 0 ]; then
echo " Skipped:"
printf ' - %s\n' "${SKIPPED_ITEMS[@]}"
fi
if [ "${#FAILED_ITEMS[@]}" -gt 0 ]; then
echo " Failed:"
printf ' - %s\n' "${FAILED_ITEMS[@]}"
fi
}
normalize_url() {
printf '%s' "${1%/}"
}
require_cmd() {
if ! command -v "$1" >/dev/null 2>&1; then
echo "Missing required command: $1" >&2
exit 1
fi
}
require_git_lfs() {
if ! git lfs version >/dev/null 2>&1; then
echo "Missing required command: git-lfs" >&2
exit 1
fi
}
parse_args() {
while [ $# -gt 0 ]; do
case "$1" in
--refresh)
REFRESH_DOWNLOADS=1
;;
-h|--help)
usage
exit 0
;;
*)
echo "Unknown option: $1" >&2
usage >&2
exit 1
;;
esac
shift
done
}
file_size() {
if stat -c '%s' "$1" >/dev/null 2>&1; then
stat -c '%s' "$1"
else
stat -f '%z' "$1"
fi
}
remote_content_length() {
curl -fsSI -L "$1" | tr -d '\r' | awk -F': ' 'tolower($1) == "content-length" { print $2 }' | tail -n 1
}
ensure_parent_dirs() {
mkdir -p "$LAB2_ASSETS_DIR" "$GGUF_DIR"
}
ensure_expected_repo_checkout() {
local current_remote
local normalized_current
local normalized_expected
if [ -e "$BASE_REPO_DIR" ] && [ ! -d "$BASE_REPO_DIR/.git" ]; then
echo "Refusing to reuse $BASE_REPO_DIR because it exists but is not a git checkout." >&2
echo "Move it aside or remove it, then rerun ./labctl assets lab2." >&2
return 1
fi
if [ ! -d "$BASE_REPO_DIR/.git" ]; then
return 0
fi
current_remote=$(git -C "$BASE_REPO_DIR" remote get-url origin 2>/dev/null || true)
normalized_current=$(normalize_url "$current_remote")
normalized_expected=$(normalize_url "$BASE_REPO_URL")
if [ -z "$current_remote" ] || [ "$normalized_current" != "$normalized_expected" ]; then
echo "Refusing to reuse $BASE_REPO_DIR because its origin remote is unexpected." >&2
echo "Expected: $BASE_REPO_URL" >&2
echo "Found: ${current_remote:-<missing>}" >&2
echo "Move it aside or remove it, then rerun ./labctl assets lab2." >&2
return 1
fi
if [ -n "$(git -C "$BASE_REPO_DIR" status --porcelain --untracked-files=all)" ]; then
echo "Refusing to refresh $BASE_REPO_DIR because it has local changes." >&2
echo "Commit, stash, or remove that checkout first, then rerun ./labctl assets lab2." >&2
return 1
fi
return 0
}
prepare_base_repo() {
if ! ensure_expected_repo_checkout; then
record_failure "WhiteRabbitNeo-V3-7B checkout"
return 1
fi
git lfs install --skip-repo >/dev/null
if [ ! -d "$BASE_REPO_DIR/.git" ]; then
echo "Cloning WhiteRabbitNeo-V3-7B into $BASE_REPO_DIR"
GIT_LFS_SKIP_SMUDGE=1 git clone --depth=1 "$BASE_REPO_URL" "$BASE_REPO_DIR"
record_clone "assets/lab2/WhiteRabbitNeo-V3-7B"
else
echo "Refreshing WhiteRabbitNeo-V3-7B in $BASE_REPO_DIR"
git -C "$BASE_REPO_DIR" fetch --depth=1 origin "$BASE_REPO_REF"
git -C "$BASE_REPO_DIR" checkout -f --detach FETCH_HEAD
record_refresh "assets/lab2/WhiteRabbitNeo-V3-7B"
fi
git -C "$BASE_REPO_DIR" lfs install --local >/dev/null
git -C "$BASE_REPO_DIR" lfs pull origin
}
download_gguf() {
local url=$1
local filename=$2
local destination="$GGUF_DIR/$filename"
local partial="$destination.part"
local expected_size
local actual_size
expected_size=$(remote_content_length "$url" || true)
if [ -f "$destination" ] && [ "$REFRESH_DOWNLOADS" -eq 0 ]; then
if [ -n "$expected_size" ] && [ "$(file_size "$destination")" = "$expected_size" ]; then
echo "Skipping $filename; matching file already exists."
record_skip "assets/lab2/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/$filename"
return 0
fi
if [ -z "$expected_size" ] && [ "$(file_size "$destination")" -gt 0 ]; then
echo "Skipping $filename; existing file size is non-zero and no remote size was available."
record_skip "assets/lab2/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/$filename"
return 0
fi
fi
if [ "$REFRESH_DOWNLOADS" -eq 1 ] && [ -f "$destination" ]; then
rm -f "$destination"
fi
echo "Downloading $filename"
if ! curl -fL --progress-bar -C - -o "$partial" "$url"; then
echo "Failed to download $filename. Partial data, if any, remains at $partial." >&2
record_failure "assets/lab2/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/$filename"
return 1
fi
if [ ! -f "$partial" ]; then
echo "Download for $filename did not produce an output file." >&2
record_failure "assets/lab2/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/$filename"
return 1
fi
if [ -n "$expected_size" ]; then
actual_size=$(file_size "$partial")
if [ "$actual_size" != "$expected_size" ]; then
echo "Downloaded size mismatch for $filename: expected $expected_size bytes, got $actual_size bytes." >&2
record_failure "assets/lab2/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/$filename"
return 1
fi
fi
mv -f "$partial" "$destination"
record_download "assets/lab2/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/$filename"
return 0
}
download_ggufs() {
local url
local filename
local failures=0
for url in "${GGUF_URLS[@]}"; do
filename=${url##*/}
filename=${filename%%\?*}
if ! download_gguf "$url" "$filename"; then
failures=1
fi
done
return "$failures"
}
main() {
local status=0
parse_args "$@"
require_cmd git
require_cmd curl
require_git_lfs
ensure_parent_dirs
if ! prepare_base_repo; then
status=1
fi
if ! download_ggufs; then
status=1
fi
print_summary
if [ "$status" -ne 0 ]; then
echo "Lab 2 asset bootstrap did not complete cleanly." >&2
exit "$status"
fi
echo "Lab 2 repo-local assets are ready."
}
main "$@"