302 lines
7.7 KiB
Bash
302 lines
7.7 KiB
Bash
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
|
|
LAB2_ASSETS_DIR="$ROOT_DIR/assets/lab2"
|
|
BASE_REPO_URL="https://huggingface.co/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B"
|
|
BASE_REPO_REF="main"
|
|
BASE_REPO_DIR="$LAB2_ASSETS_DIR/WhiteRabbitNeo-V3-7B"
|
|
GGUF_DIR="$LAB2_ASSETS_DIR/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF"
|
|
REFRESH_DOWNLOADS=0
|
|
|
|
GGUF_URLS=(
|
|
"https://huggingface.co/bartowski/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/resolve/main/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-Q4_K_M.gguf?download=true"
|
|
"https://huggingface.co/bartowski/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/resolve/main/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-Q8_0.gguf?download=true"
|
|
"https://huggingface.co/bartowski/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/resolve/main/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-IQ2_M.gguf?download=true"
|
|
)
|
|
|
|
CLONED_ITEMS=()
|
|
REFRESHED_ITEMS=()
|
|
DOWNLOADED_ITEMS=()
|
|
SKIPPED_ITEMS=()
|
|
FAILED_ITEMS=()
|
|
|
|
usage() {
|
|
cat <<'EOF'
|
|
Usage: ./labctl assets lab2 [--refresh]
|
|
|
|
Populate repo-local lab 2 assets from Hugging Face without touching `up` or `preflight`.
|
|
|
|
Actions:
|
|
- Clone or refresh WhiteRabbitNeo-V3-7B into assets/lab2/WhiteRabbitNeo-V3-7B
|
|
- Download the supported GGUF files into assets/lab2/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF
|
|
|
|
Options:
|
|
--refresh Re-download GGUF files even if matching files already exist
|
|
-h, --help Show this help text
|
|
EOF
|
|
}
|
|
|
|
record_failure() {
|
|
FAILED_ITEMS+=("$1")
|
|
}
|
|
|
|
record_skip() {
|
|
SKIPPED_ITEMS+=("$1")
|
|
}
|
|
|
|
record_clone() {
|
|
CLONED_ITEMS+=("$1")
|
|
}
|
|
|
|
record_refresh() {
|
|
REFRESHED_ITEMS+=("$1")
|
|
}
|
|
|
|
record_download() {
|
|
DOWNLOADED_ITEMS+=("$1")
|
|
}
|
|
|
|
print_summary() {
|
|
echo
|
|
echo "Lab 2 asset bootstrap summary:"
|
|
|
|
if [ "${#CLONED_ITEMS[@]}" -gt 0 ]; then
|
|
echo " Cloned:"
|
|
printf ' - %s\n' "${CLONED_ITEMS[@]}"
|
|
fi
|
|
|
|
if [ "${#REFRESHED_ITEMS[@]}" -gt 0 ]; then
|
|
echo " Refreshed:"
|
|
printf ' - %s\n' "${REFRESHED_ITEMS[@]}"
|
|
fi
|
|
|
|
if [ "${#DOWNLOADED_ITEMS[@]}" -gt 0 ]; then
|
|
echo " Downloaded:"
|
|
printf ' - %s\n' "${DOWNLOADED_ITEMS[@]}"
|
|
fi
|
|
|
|
if [ "${#SKIPPED_ITEMS[@]}" -gt 0 ]; then
|
|
echo " Skipped:"
|
|
printf ' - %s\n' "${SKIPPED_ITEMS[@]}"
|
|
fi
|
|
|
|
if [ "${#FAILED_ITEMS[@]}" -gt 0 ]; then
|
|
echo " Failed:"
|
|
printf ' - %s\n' "${FAILED_ITEMS[@]}"
|
|
fi
|
|
}
|
|
|
|
normalize_url() {
|
|
printf '%s' "${1%/}"
|
|
}
|
|
|
|
require_cmd() {
|
|
if ! command -v "$1" >/dev/null 2>&1; then
|
|
echo "Missing required command: $1" >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
require_git_lfs() {
|
|
if ! git lfs version >/dev/null 2>&1; then
|
|
echo "Missing required command: git-lfs" >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
parse_args() {
|
|
while [ $# -gt 0 ]; do
|
|
case "$1" in
|
|
--refresh)
|
|
REFRESH_DOWNLOADS=1
|
|
;;
|
|
-h|--help)
|
|
usage
|
|
exit 0
|
|
;;
|
|
*)
|
|
echo "Unknown option: $1" >&2
|
|
usage >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
shift
|
|
done
|
|
}
|
|
|
|
file_size() {
|
|
if stat -c '%s' "$1" >/dev/null 2>&1; then
|
|
stat -c '%s' "$1"
|
|
else
|
|
stat -f '%z' "$1"
|
|
fi
|
|
}
|
|
|
|
remote_content_length() {
|
|
curl -fsSI -L "$1" | tr -d '\r' | awk -F': ' 'tolower($1) == "content-length" { print $2 }' | tail -n 1
|
|
}
|
|
|
|
ensure_parent_dirs() {
|
|
mkdir -p "$LAB2_ASSETS_DIR" "$GGUF_DIR"
|
|
}
|
|
|
|
ensure_expected_repo_checkout() {
|
|
local current_remote
|
|
local normalized_current
|
|
local normalized_expected
|
|
|
|
if [ -e "$BASE_REPO_DIR" ] && [ ! -d "$BASE_REPO_DIR/.git" ]; then
|
|
echo "Refusing to reuse $BASE_REPO_DIR because it exists but is not a git checkout." >&2
|
|
echo "Move it aside or remove it, then rerun ./labctl assets lab2." >&2
|
|
return 1
|
|
fi
|
|
|
|
if [ ! -d "$BASE_REPO_DIR/.git" ]; then
|
|
return 0
|
|
fi
|
|
|
|
current_remote=$(git -C "$BASE_REPO_DIR" remote get-url origin 2>/dev/null || true)
|
|
normalized_current=$(normalize_url "$current_remote")
|
|
normalized_expected=$(normalize_url "$BASE_REPO_URL")
|
|
if [ -z "$current_remote" ] || [ "$normalized_current" != "$normalized_expected" ]; then
|
|
echo "Refusing to reuse $BASE_REPO_DIR because its origin remote is unexpected." >&2
|
|
echo "Expected: $BASE_REPO_URL" >&2
|
|
echo "Found: ${current_remote:-<missing>}" >&2
|
|
echo "Move it aside or remove it, then rerun ./labctl assets lab2." >&2
|
|
return 1
|
|
fi
|
|
|
|
if [ -n "$(git -C "$BASE_REPO_DIR" status --porcelain --untracked-files=all)" ]; then
|
|
echo "Refusing to refresh $BASE_REPO_DIR because it has local changes." >&2
|
|
echo "Commit, stash, or remove that checkout first, then rerun ./labctl assets lab2." >&2
|
|
return 1
|
|
fi
|
|
|
|
return 0
|
|
}
|
|
|
|
prepare_base_repo() {
|
|
if ! ensure_expected_repo_checkout; then
|
|
record_failure "WhiteRabbitNeo-V3-7B checkout"
|
|
return 1
|
|
fi
|
|
|
|
git lfs install --skip-repo >/dev/null
|
|
|
|
if [ ! -d "$BASE_REPO_DIR/.git" ]; then
|
|
echo "Cloning WhiteRabbitNeo-V3-7B into $BASE_REPO_DIR"
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone --depth=1 "$BASE_REPO_URL" "$BASE_REPO_DIR"
|
|
record_clone "assets/lab2/WhiteRabbitNeo-V3-7B"
|
|
else
|
|
echo "Refreshing WhiteRabbitNeo-V3-7B in $BASE_REPO_DIR"
|
|
git -C "$BASE_REPO_DIR" fetch --depth=1 origin "$BASE_REPO_REF"
|
|
git -C "$BASE_REPO_DIR" checkout -f --detach FETCH_HEAD
|
|
record_refresh "assets/lab2/WhiteRabbitNeo-V3-7B"
|
|
fi
|
|
|
|
git -C "$BASE_REPO_DIR" lfs install --local >/dev/null
|
|
git -C "$BASE_REPO_DIR" lfs pull origin
|
|
}
|
|
|
|
download_gguf() {
|
|
local url=$1
|
|
local filename=$2
|
|
local destination="$GGUF_DIR/$filename"
|
|
local partial="$destination.part"
|
|
local expected_size
|
|
local actual_size
|
|
|
|
expected_size=$(remote_content_length "$url" || true)
|
|
|
|
if [ -f "$destination" ] && [ "$REFRESH_DOWNLOADS" -eq 0 ]; then
|
|
if [ -n "$expected_size" ] && [ "$(file_size "$destination")" = "$expected_size" ]; then
|
|
echo "Skipping $filename; matching file already exists."
|
|
record_skip "assets/lab2/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/$filename"
|
|
return 0
|
|
fi
|
|
|
|
if [ -z "$expected_size" ] && [ "$(file_size "$destination")" -gt 0 ]; then
|
|
echo "Skipping $filename; existing file size is non-zero and no remote size was available."
|
|
record_skip "assets/lab2/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/$filename"
|
|
return 0
|
|
fi
|
|
fi
|
|
|
|
if [ "$REFRESH_DOWNLOADS" -eq 1 ] && [ -f "$destination" ]; then
|
|
rm -f "$destination"
|
|
fi
|
|
|
|
echo "Downloading $filename"
|
|
if ! curl -fL --progress-bar -C - -o "$partial" "$url"; then
|
|
echo "Failed to download $filename. Partial data, if any, remains at $partial." >&2
|
|
record_failure "assets/lab2/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/$filename"
|
|
return 1
|
|
fi
|
|
|
|
if [ ! -f "$partial" ]; then
|
|
echo "Download for $filename did not produce an output file." >&2
|
|
record_failure "assets/lab2/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/$filename"
|
|
return 1
|
|
fi
|
|
|
|
if [ -n "$expected_size" ]; then
|
|
actual_size=$(file_size "$partial")
|
|
if [ "$actual_size" != "$expected_size" ]; then
|
|
echo "Downloaded size mismatch for $filename: expected $expected_size bytes, got $actual_size bytes." >&2
|
|
record_failure "assets/lab2/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/$filename"
|
|
return 1
|
|
fi
|
|
fi
|
|
|
|
mv -f "$partial" "$destination"
|
|
record_download "assets/lab2/WhiteRabbitNeo_WhiteRabbitNeo-V3-7B-GGUF/$filename"
|
|
return 0
|
|
}
|
|
|
|
download_ggufs() {
|
|
local url
|
|
local filename
|
|
local failures=0
|
|
|
|
for url in "${GGUF_URLS[@]}"; do
|
|
filename=${url##*/}
|
|
filename=${filename%%\?*}
|
|
|
|
if ! download_gguf "$url" "$filename"; then
|
|
failures=1
|
|
fi
|
|
done
|
|
|
|
return "$failures"
|
|
}
|
|
|
|
main() {
|
|
local status=0
|
|
|
|
parse_args "$@"
|
|
require_cmd git
|
|
require_cmd curl
|
|
require_git_lfs
|
|
ensure_parent_dirs
|
|
|
|
if ! prepare_base_repo; then
|
|
status=1
|
|
fi
|
|
|
|
if ! download_ggufs; then
|
|
status=1
|
|
fi
|
|
|
|
print_summary
|
|
|
|
if [ "$status" -ne 0 ]; then
|
|
echo "Lab 2 asset bootstrap did not complete cleanly." >&2
|
|
exit "$status"
|
|
fi
|
|
|
|
echo "Lab 2 repo-local assets are ready."
|
|
}
|
|
|
|
main "$@"
|