Files
LLM-Labs-Local/ansible/roles/preflight/tasks/main.yml
T
2026-04-24 21:32:01 -06:00

293 lines
9.8 KiB
YAML

- name: Classify supported host profile
set_fact:
courseware_is_linux: "{{ ansible_system == 'Linux' }}"
courseware_is_wsl: "{{ 'microsoft' in ansible_kernel | lower or 'wsl' in ansible_kernel | lower }}"
courseware_is_native_linux: "{{ ansible_system == 'Linux' and not ('microsoft' in ansible_kernel | lower or 'wsl' in ansible_kernel | lower) }}"
courseware_host_profile: "{{ 'wsl' if ansible_system == 'Linux' and ('microsoft' in ansible_kernel | lower or 'wsl' in ansible_kernel | lower) else ('native-debian-ubuntu' if ansible_system == 'Linux' and ansible_os_family == 'Debian' else 'unsupported') }}"
- name: Fail on unsupported operating systems
fail:
msg: "Supported platforms are Debian-family Linux and WSL."
when: courseware_host_profile == "unsupported"
- name: Fail on unsupported Linux family
fail:
msg: "This installer currently supports Debian and Ubuntu only."
when:
- courseware_is_linux
- ansible_os_family != "Debian"
- name: Query NVIDIA GPU memory
command: nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits
register: courseware_gpu_memory
changed_when: false
failed_when: false
when: courseware_is_linux
- name: Query NVIDIA GPU names
command: nvidia-smi --query-gpu=name --format=csv,noheader
register: courseware_gpu_names
changed_when: false
failed_when: false
when: courseware_is_linux
- name: Fail when no supported NVIDIA GPU is visible
fail:
msg: "Linux/WSL requires an NVIDIA GPU visible to nvidia-smi."
when:
- courseware_is_linux
- courseware_gpu_memory.rc != 0
- name: Fail when GPU VRAM is below baseline
fail:
msg: "This build assumes at least 8 GB of VRAM on Linux/WSL."
when:
- courseware_is_linux
- (courseware_gpu_memory.stdout_lines | map('int') | max) < 8192
- name: Check for CUDA compiler on Linux
command: which nvcc
register: courseware_preflight_nvcc
changed_when: false
failed_when: false
when: courseware_is_linux
- name: Check for CUDA compiler in standard install locations
shell: |
for candidate in /usr/local/cuda/bin/nvcc /usr/local/cuda-*/bin/nvcc; do
if [ -x "$candidate" ]; then
printf '%s\n' "$candidate"
exit 0
fi
done
exit 1
args:
executable: /bin/bash
register: courseware_preflight_nvcc_fallback
changed_when: false
failed_when: false
when: courseware_is_linux
- name: Check for CUDA runtime header on Linux
stat:
path: "{{ item }}"
loop:
- /usr/local/cuda/include/cuda_runtime.h
- /usr/include/cuda_runtime.h
register: courseware_preflight_cuda_headers
when: courseware_is_linux
- name: Set CUDA toolkit readiness
set_fact:
courseware_cuda_toolkit_ready: >-
{{
courseware_preflight_nvcc.rc == 0
or courseware_preflight_nvcc_fallback.rc == 0
or (courseware_preflight_cuda_headers.results | selectattr('stat.exists', 'equalto', true) | list | length > 0)
}}
when: courseware_is_linux
- name: Query distro CUDA toolkit apt candidate
command: apt-cache policy nvidia-cuda-toolkit
register: courseware_preflight_cuda_toolkit_policy
changed_when: false
failed_when: false
when:
- courseware_is_linux
- ansible_os_family == "Debian"
- name: Set distro CUDA toolkit package availability
set_fact:
courseware_preflight_cuda_toolkit_package_available: >-
{{
courseware_preflight_cuda_toolkit_policy.rc == 0
and 'Candidate: (none)' not in courseware_preflight_cuda_toolkit_policy.stdout
}}
when:
- courseware_is_linux
- ansible_os_family == "Debian"
- name: Fail when automatic WSL CUDA bootstrap is unsupported
fail:
msg: "Automatic CUDA bootstrap currently supports Ubuntu x86_64 on WSL only. For other WSL distros, install the CUDA toolkit manually before rerunning."
when:
- courseware_host_profile == "wsl"
- not courseware_cuda_toolkit_ready
- ansible_distribution != "Ubuntu" or ansible_architecture not in ["x86_64", "amd64"]
- name: Install distro CUDA toolkit on Ubuntu WSL when available
become: true
apt:
name: nvidia-cuda-toolkit
state: present
update_cache: true
when:
- courseware_host_profile == "wsl"
- not courseware_cuda_toolkit_ready
- ansible_distribution == "Ubuntu"
- ansible_architecture in ["x86_64", "amd64"]
- courseware_preflight_cuda_toolkit_package_available | default(false)
- name: Recheck CUDA compiler after distro toolkit install
command: which nvcc
register: courseware_preflight_nvcc_after_distro_install
changed_when: false
failed_when: false
when:
- courseware_host_profile == "wsl"
- not courseware_cuda_toolkit_ready
- ansible_distribution == "Ubuntu"
- ansible_architecture in ["x86_64", "amd64"]
- courseware_preflight_cuda_toolkit_package_available | default(false)
- name: Recheck CUDA runtime header after distro toolkit install
stat:
path: "{{ item }}"
loop:
- /usr/local/cuda/include/cuda_runtime.h
- /usr/include/cuda_runtime.h
register: courseware_preflight_cuda_headers_after_distro_install
when:
- courseware_host_profile == "wsl"
- not courseware_cuda_toolkit_ready
- ansible_distribution == "Ubuntu"
- ansible_architecture in ["x86_64", "amd64"]
- courseware_preflight_cuda_toolkit_package_available | default(false)
- name: Refresh CUDA toolkit readiness after distro toolkit install
set_fact:
courseware_cuda_toolkit_ready: >-
{{
courseware_preflight_nvcc_after_distro_install.rc == 0
or (courseware_preflight_cuda_headers_after_distro_install.results | selectattr('stat.exists', 'equalto', true) | list | length > 0)
}}
when:
- courseware_host_profile == "wsl"
- not courseware_cuda_toolkit_ready
- ansible_distribution == "Ubuntu"
- ansible_architecture in ["x86_64", "amd64"]
- courseware_preflight_cuda_toolkit_package_available | default(false)
- name: Remove legacy NVIDIA CUDA apt key when preparing WSL toolkit install
become: true
command: apt-key del 7fa2af80
register: courseware_wsl_cuda_apt_key_delete
changed_when: courseware_wsl_cuda_apt_key_delete.rc == 0
failed_when: false
when:
- courseware_host_profile == "wsl"
- not courseware_cuda_toolkit_ready
- ansible_distribution == "Ubuntu"
- ansible_architecture in ["x86_64", "amd64"]
- name: Download NVIDIA WSL CUDA apt pin
become: true
get_url:
url: "{{ courseware_wsl_cuda_pin_url }}"
dest: "{{ courseware_wsl_cuda_pin_dest }}"
mode: "0644"
force: true
when:
- courseware_host_profile == "wsl"
- not courseware_cuda_toolkit_ready
- ansible_distribution == "Ubuntu"
- ansible_architecture in ["x86_64", "amd64"]
- name: Download NVIDIA WSL CUDA local installer
get_url:
url: "{{ courseware_wsl_cuda_installer_url }}"
dest: "{{ courseware_wsl_cuda_installer_local_path }}"
mode: "0644"
force: false
when:
- courseware_host_profile == "wsl"
- not courseware_cuda_toolkit_ready
- ansible_distribution == "Ubuntu"
- ansible_architecture in ["x86_64", "amd64"]
- name: Install NVIDIA WSL CUDA local repository package
become: true
apt:
deb: "{{ courseware_wsl_cuda_installer_local_path }}"
state: present
when:
- courseware_host_profile == "wsl"
- not courseware_cuda_toolkit_ready
- ansible_distribution == "Ubuntu"
- ansible_architecture in ["x86_64", "amd64"]
- name: Find NVIDIA WSL CUDA keyring
become: true
find:
paths: "{{ courseware_wsl_cuda_repo_dir }}"
patterns: "cuda-*-keyring.gpg"
file_type: file
register: courseware_wsl_cuda_keyring
when:
- courseware_host_profile == "wsl"
- not courseware_cuda_toolkit_ready
- ansible_distribution == "Ubuntu"
- ansible_architecture in ["x86_64", "amd64"]
- name: Fail when NVIDIA WSL CUDA keyring is missing
fail:
msg: "The NVIDIA WSL CUDA repository package was installed, but its keyring file was not found under {{ courseware_wsl_cuda_repo_dir }}."
when:
- courseware_host_profile == "wsl"
- not courseware_cuda_toolkit_ready
- ansible_distribution == "Ubuntu"
- ansible_architecture in ["x86_64", "amd64"]
- (courseware_wsl_cuda_keyring.files | length) == 0
- name: Copy NVIDIA WSL CUDA keyring into trusted keyrings
become: true
copy:
src: "{{ courseware_wsl_cuda_keyring.files[0].path }}"
dest: "/usr/share/keyrings/{{ courseware_wsl_cuda_keyring.files[0].path | basename }}"
remote_src: true
mode: "0644"
when:
- courseware_host_profile == "wsl"
- not courseware_cuda_toolkit_ready
- ansible_distribution == "Ubuntu"
- ansible_architecture in ["x86_64", "amd64"]
- (courseware_wsl_cuda_keyring.files | length) > 0
- name: Install NVIDIA WSL CUDA toolkit
become: true
apt:
name: "{{ courseware_wsl_cuda_toolkit_package }}"
state: present
update_cache: true
when:
- courseware_host_profile == "wsl"
- not courseware_cuda_toolkit_ready
- ansible_distribution == "Ubuntu"
- ansible_architecture in ["x86_64", "amd64"]
- name: Recheck CUDA compiler after WSL toolkit install
command: which nvcc
register: courseware_preflight_nvcc_after_install
changed_when: false
failed_when: false
when:
- courseware_host_profile == "wsl"
- not courseware_cuda_toolkit_ready
- ansible_distribution == "Ubuntu"
- ansible_architecture in ["x86_64", "amd64"]
- name: Fail when CUDA toolkit is still missing after WSL install attempt
fail:
msg: "The NVIDIA WSL CUDA toolkit install completed, but `nvcc` is still missing. Verify the repository package and rerun the installer."
when:
- courseware_host_profile == "wsl"
- not courseware_cuda_toolkit_ready
- ansible_distribution == "Ubuntu"
- ansible_architecture in ["x86_64", "amd64"]
- courseware_preflight_nvcc_after_install.rc != 0
- name: Set runtime binary defaults
set_fact:
courseware_python_bin: "/usr/bin/python3"
courseware_ollama_bin: "ollama"