Files
LLM-Labs-Local/scripts/service_manager.sh
T
2026-04-16 11:16:01 -06:00

399 lines
8.1 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck disable=SC1091
. "$SCRIPT_DIR/common.sh"
load_runtime_env
mkdir -p "$STATE_DIR/run" "$STATE_DIR/logs"
check_wetty_prereqs() {
if [ ! -x "$WETTY_BIN" ]; then
echo "Missing WeTTY binary at $WETTY_BIN. Re-run ./labctl up." >&2
exit 1
fi
if [ ! -f "$WIKI_RUNTIME_CONFIG_PATH" ]; then
echo "Missing wiki runtime config at $WIKI_RUNTIME_CONFIG_PATH. Re-run ./labctl up." >&2
exit 1
fi
if ! python3 - <<'PY'
import socket, sys
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(1)
try:
sock.connect(("127.0.0.1", 22))
except OSError:
sys.exit(1)
finally:
sock.close()
PY
then
echo "Loopback sshd is not reachable on 127.0.0.1:22." >&2
exit 1
fi
}
ollama_version_gte_minimum() {
local version_output
local installed_version
if ! command -v "$OLLAMA_BIN" >/dev/null 2>&1; then
return 1
fi
version_output=$("$OLLAMA_BIN" --version 2>/dev/null || true)
installed_version=$(printf '%s' "$version_output" | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -n 1)
if [ -z "$installed_version" ]; then
return 1
fi
[ "$(printf '%s\n' "$COURSEWARE_OLLAMA_MIN_VERSION" "$installed_version" | sort -V | head -n 1)" = "$COURSEWARE_OLLAMA_MIN_VERSION" ]
}
assert_ollama_logprobs_support() {
if ollama_version_gte_minimum; then
return 0
fi
local version_output
version_output=$("$OLLAMA_BIN" --version 2>/dev/null || printf 'unknown')
cat <<EOF >&2
Lab 1 requires Ollama ${COURSEWARE_OLLAMA_MIN_VERSION} or newer because the confidence visualizer depends on logprobs.
Installed version: ${version_output}
Re-run ./labctl up after upgrading Ollama.
EOF
exit 1
}
resolve_targets() {
if [ $# -eq 0 ]; then
echo "No target specified." >&2
exit 1
fi
case "$1" in
core)
printf '%s\n' "ollama" "open-webui"
;;
all)
service_list
;;
*)
printf '%s\n' "$@"
;;
esac
}
has_live_pid() {
local service=$1
local pid_file
pid_file=$(service_pid_file "$service")
if [ -f "$pid_file" ]; then
local pid
pid=$(cat "$pid_file")
if kill -0 "$pid" >/dev/null 2>&1; then
return 0
fi
fi
return 1
}
is_running() {
local service=$1
has_live_pid "$service" || service_ready "$service"
}
service_ready() {
local service=$1
case "$service" in
ollama)
curl -fsS "$(service_url "$service")/api/tags" >/dev/null 2>&1
;;
promptfoo)
curl -fsS "$(service_url "$service")/health" >/dev/null 2>&1
;;
open-webui|netron|chunkviz|embedding-atlas|unsloth|wiki|wetty)
curl -fsS "$(service_url "$service")" >/dev/null 2>&1
;;
*)
return 1
;;
esac
}
service_listener_pids() {
local service=$1
local port
port=$(service_port "$service") || return 0
ss -ltnp "( sport = :$port )" 2>/dev/null \
| grep -o 'pid=[0-9]\+' \
| cut -d= -f2 \
| sort -u
}
kill_pid_tree() {
local signal=$1
local pid=$2
if [[ ! "$pid" =~ ^[0-9]+$ ]]; then
return 0
fi
kill "-$signal" -- "-$pid" >/dev/null 2>&1 || true
pkill "-$signal" -P "$pid" >/dev/null 2>&1 || true
kill "-$signal" "$pid" >/dev/null 2>&1 || true
}
terminate_service_processes() {
local service=$1
local signal=$2
local pid=${3:-}
local listener_pid
if [ -n "$pid" ]; then
kill_pid_tree "$signal" "$pid"
fi
while IFS= read -r listener_pid; do
kill_pid_tree "$signal" "$listener_pid"
done < <(service_listener_pids "$service")
}
start_one() {
local service=$1
local cmd
local log_file
local pid_file
local attempt
local pid_grace_attempts=5
if [ "$service" = "ollama" ] || [ "$service" = "wiki" ]; then
assert_ollama_logprobs_support
fi
if has_live_pid "$service"; then
echo "$service already running"
return 0
fi
if service_ready "$service"; then
echo "$service already available"
return 0
fi
case "$service" in
open-webui)
start_one ollama
;;
wetty)
check_wetty_prereqs
;;
*)
;;
esac
cmd=$(service_command "$service")
log_file=$(service_log_file "$service")
pid_file=$(service_pid_file "$service")
if [ "$service" = "ollama" ]; then
env \
OLLAMA_HOST="${COURSEWARE_BIND_HOST}:${COURSEWARE_OLLAMA_PORT}" \
OLLAMA_MODELS="$OLLAMA_MODELS_DIR" \
"$OLLAMA_BIN" serve </dev/null >>"$log_file" 2>&1 &
elif command -v setsid >/dev/null 2>&1; then
nohup setsid bash -lc "$cmd" </dev/null >>"$log_file" 2>&1 &
else
nohup bash -lc "$cmd" </dev/null >>"$log_file" 2>&1 &
fi
echo $! >"$pid_file"
for attempt in $(seq 1 60); do
if service_ready "$service"; then
echo "started $service"
return 0
fi
if ! has_live_pid "$service"; then
if [ "$attempt" -ge "$pid_grace_attempts" ]; then
rm -f "$pid_file"
echo "failed to start $service; check $log_file" >&2
exit 1
fi
fi
sleep 1
done
echo "$service did not become ready in time; check $log_file" >&2
exit 1
}
stop_one() {
local service=$1
local pid_file
local pid=""
local attempt
pid_file=$(service_pid_file "$service")
if [ -f "$pid_file" ]; then
pid=$(cat "$pid_file")
fi
if [ -z "$pid" ] && ! service_ready "$service"; then
echo "$service not running"
return 0
fi
terminate_service_processes "$service" TERM "$pid"
for attempt in $(seq 1 20); do
if ! has_live_pid "$service" && ! service_ready "$service"; then
rm -f "$pid_file"
echo "stopped $service"
return 0
fi
sleep 1
done
terminate_service_processes "$service" KILL "$pid"
for attempt in $(seq 1 5); do
if ! has_live_pid "$service" && ! service_ready "$service"; then
rm -f "$pid_file"
echo "stopped $service"
return 0
fi
sleep 1
done
rm -f "$pid_file"
echo "failed to stop $service cleanly" >&2
exit 1
}
status_one() {
local service=$1
if service_ready "$service"; then
printf 'RUNNING %-15s %s\n' "$service" "$(service_url "$service")"
elif has_live_pid "$service"; then
printf 'STARTING %-15s %s\n' "$service" "$(service_url "$service")"
else
printf 'STOPPED %-15s %s\n' "$service" "$(service_url "$service")"
fi
}
urls() {
cat <<EOF
Ollama API: $(service_url ollama)
Open WebUI: $(service_url open-webui)
Netron: $(service_url netron)
ChunkViz: $(service_url chunkviz)
Embedding Atlas: $(service_url embedding-atlas)
Unsloth Studio: $(service_url unsloth)
Promptfoo CLI: $PROMPTFOO_BIN
Promptfoo UI: $(service_url promptfoo)
Wiki: $(service_url wiki)
Lab 3 Terminal: $(service_url wetty)
Kiln app: ${KILN_LAUNCH_PATH:-not installed}
EOF
}
open_kiln() {
local host_os
host_os=$(uname -s)
if [ "$host_os" = "Darwin" ] && [ -d "$KILN_MAC_APP" ]; then
open "$KILN_MAC_APP"
return 0
fi
if [ -x "$KILN_LINUX_BIN" ]; then
nohup "$KILN_LINUX_BIN" >/dev/null 2>&1 &
echo "started Kiln from $KILN_LINUX_BIN"
return 0
fi
echo "Kiln is not installed." >&2
exit 1
}
show_logs() {
local service=$1
local log_file
log_file=$(service_log_file "$service")
if [ ! -f "$log_file" ]; then
echo "No log file for $service" >&2
exit 1
fi
tail -n 80 "$log_file"
}
main() {
local cmd=${1:-}
shift || true
ensure_runtime_env
case "$cmd" in
start)
while IFS= read -r service; do
start_one "$service"
done < <(resolve_targets "$@")
;;
stop)
while IFS= read -r service; do
stop_one "$service"
done < <(resolve_targets "$@")
;;
status)
if [ $# -eq 0 ]; then
set -- all
fi
while IFS= read -r service; do
status_one "$service"
done < <(resolve_targets "$@")
;;
urls)
urls
;;
open)
if [ "${1:-}" != "kiln" ]; then
echo "Only 'open kiln' is supported." >&2
exit 1
fi
open_kiln
;;
logs)
if [ $# -ne 1 ]; then
echo "Usage: ./labctl logs <service>" >&2
exit 1
fi
show_logs "$1"
;;
*)
echo "Unknown command: $cmd" >&2
exit 1
;;
esac
}
main "$@"