- name: Clone llama.cpp git: repo: "https://github.com/ggml-org/llama.cpp.git" dest: "{{ courseware_repos_dir }}/llama.cpp" version: "{{ courseware_llama_cpp_commit }}" update: false - name: Check for CUDA compiler on Linux command: which nvcc register: courseware_llama_nvcc changed_when: false failed_when: false when: ansible_system == "Linux" - name: Check for CUDA compiler in standard install locations shell: | for candidate in /usr/local/cuda/bin/nvcc /usr/local/cuda-*/bin/nvcc; do if [ -x "$candidate" ]; then printf '%s\n' "$candidate" exit 0 fi done exit 1 args: executable: /bin/bash register: courseware_llama_nvcc_fallback changed_when: false failed_when: false when: ansible_system == "Linux" - name: Check for CUDA runtime header on Linux stat: path: "{{ item }}" loop: - /usr/local/cuda/include/cuda_runtime.h - /usr/include/cuda_runtime.h register: courseware_llama_cuda_headers when: ansible_system == "Linux" - name: Fail early when CUDA toolkit is missing on Linux/WSL fail: msg: | CUDA Toolkit is not installed inside this Linux environment. `nvidia-smi` only proves that the NVIDIA driver is visible. It does not provide the Linux-side CUDA development toolkit needed to build CUDA-enabled llama.cpp. If you are using WSL, this is the common split: - Windows side: NVIDIA driver exposes the GPU to WSL - Linux side: CUDA toolkit still must exist inside the distro Fix it, then rerun: bash deploy-courseware.sh First try: sudo apt update sudo apt install -y nvidia-cuda-toolkit If that package is unavailable in your distro: 1. add NVIDIA's CUDA apt repository for your Debian/Ubuntu release 2. install the CUDA toolkit from that repository Verify with: nvcc --version ls /usr/local/cuda/include/cuda_runtime.h when: - ansible_system == "Linux" - courseware_llama_nvcc.rc != 0 - courseware_llama_nvcc_fallback.rc != 0 - (courseware_llama_cuda_headers.results | selectattr('stat.exists', 'equalto', true) | list | length == 0) - name: Set CUDA compiler path for llama.cpp builds set_fact: courseware_llama_cuda_compiler: "{{ (courseware_llama_nvcc.stdout | trim) if courseware_llama_nvcc.rc == 0 else (courseware_llama_nvcc_fallback.stdout | trim) }}" courseware_llama_cuda_bin_dir: "{{ ((courseware_llama_nvcc.stdout | trim) if courseware_llama_nvcc.rc == 0 else (courseware_llama_nvcc_fallback.stdout | trim)) | dirname }}" when: - ansible_system == "Linux" - courseware_llama_nvcc.rc == 0 or courseware_llama_nvcc_fallback.rc == 0 - name: Set llama.cpp backend flag set_fact: courseware_llama_backend_flag: "{{ '-DGGML_METAL=ON' if ansible_system == 'Darwin' else '-DGGML_CUDA=ON' }}" - name: Set llama.cpp build parallelism set_fact: courseware_llama_build_jobs: >- {{ [ 1, [ ansible_processor_vcpus | default(ansible_processor_nproc | default(1)) | int, ((ansible_memtotal_mb | int) // 8192) | int, 4 ] | min ] | max }} - name: Build llama.cpp CMake configure arguments set_fact: courseware_llama_cmake_configure_argv: >- {{ [ 'cmake', '-S', courseware_repos_dir ~ '/llama.cpp', '-B', courseware_repos_dir ~ '/llama.cpp/build', '-DCMAKE_BUILD_TYPE=Release', courseware_llama_backend_flag ] + ( ['-DCMAKE_CUDA_COMPILER=' ~ courseware_llama_cuda_compiler] if ansible_system == 'Linux' and (courseware_llama_cuda_compiler | default('') | length > 0) else [] ) }} - name: Configure llama.cpp command: argv: "{{ courseware_llama_cmake_configure_argv }}" environment: PATH: "{{ (courseware_llama_cuda_bin_dir ~ ':' ~ ansible_env.PATH) if ansible_system == 'Linux' and (courseware_llama_cuda_bin_dir | default('') | length > 0) else ansible_env.PATH }}" CUDACXX: "{{ courseware_llama_cuda_compiler | default(omit) }}" - name: Build llama.cpp tools command: argv: - cmake - --build - "{{ courseware_repos_dir }}/llama.cpp/build" - --parallel - "{{ courseware_llama_build_jobs }}" - --target - llama-cli - llama-quantize - llama-perplexity - llama-server environment: PATH: "{{ (courseware_llama_cuda_bin_dir ~ ':' ~ ansible_env.PATH) if ansible_system == 'Linux' and (courseware_llama_cuda_bin_dir | default('') | length > 0) else ansible_env.PATH }}" CUDACXX: "{{ courseware_llama_cuda_compiler | default(omit) }}" - name: Check system PATH slots for llama.cpp tools stat: path: "/usr/local/bin/{{ item }}" follow: false loop: - llama-cli - llama-quantize - llama-perplexity - llama-server register: courseware_llama_path_slots when: ansible_system == "Linux" - name: Link llama.cpp tools into /usr/local/bin become: true file: src: "{{ courseware_llama_cpp_bin_dir }}/{{ item.item }}" dest: "/usr/local/bin/{{ item.item }}" state: link force: true loop: "{{ courseware_llama_path_slots.results | default([]) }}" when: - ansible_system == "Linux" - not item.stat.exists or item.stat.islnk - not item.stat.exists or item.stat.lnk_source == (courseware_llama_cpp_bin_dir ~ '/' ~ item.item)