LLM-Labs-Local/ansible/roles/llama_cpp/tasks/main.yml

- name: Clone llama.cpp
  git:
    repo: "https://github.com/ggml-org/llama.cpp.git"
    dest: "{{ courseware_repos_dir }}/llama.cpp"
    version: "{{ courseware_llama_cpp_commit }}"
    update: false

- name: Check for CUDA compiler on Linux
  command: which nvcc
  register: courseware_llama_nvcc
  changed_when: false
  failed_when: false
  when: ansible_system == "Linux"

- name: Check for CUDA compiler in standard install locations
  shell: |
    for candidate in /usr/local/cuda/bin/nvcc /usr/local/cuda-*/bin/nvcc; do
      if [ -x "$candidate" ]; then
        printf '%s\n' "$candidate"
        exit 0
      fi
    done
    exit 1
  args:
    executable: /bin/bash
  register: courseware_llama_nvcc_fallback
  changed_when: false
  failed_when: false
  when: ansible_system == "Linux"

- name: Check for CUDA runtime header on Linux
  stat:
    path: "{{ item }}"
  loop:
    - /usr/local/cuda/include/cuda_runtime.h
    - /usr/include/cuda_runtime.h
  register: courseware_llama_cuda_headers
  when: ansible_system == "Linux"

- name: Fail early when CUDA toolkit is missing on Linux/WSL
  fail:
    msg: |
      CUDA Toolkit is not installed inside this Linux environment.

      `nvidia-smi` only proves that the NVIDIA driver is visible. It does not provide the Linux-side CUDA development toolkit needed to build CUDA-enabled llama.cpp.

      If you are using WSL, this is the common split:
      - Windows side: NVIDIA driver exposes the GPU to WSL
      - Linux side: CUDA toolkit still must exist inside the distro

      Fix it, then rerun:
        bash deploy-courseware.sh

      First try:
        sudo apt update
        sudo apt install -y nvidia-cuda-toolkit

      If that package is unavailable in your distro:
      1. add NVIDIA's CUDA apt repository for your Debian/Ubuntu release
      2. install the CUDA toolkit from that repository

      Verify with:
        nvcc --version
        ls /usr/local/cuda/include/cuda_runtime.h
  when:
    - ansible_system == "Linux"
    - courseware_llama_nvcc.rc != 0
    - courseware_llama_nvcc_fallback.rc != 0
    - (courseware_llama_cuda_headers.results | selectattr('stat.exists', 'equalto', true) | list | length == 0)

- name: Set CUDA compiler path for llama.cpp builds
  set_fact:
    courseware_llama_cuda_compiler: "{{ (courseware_llama_nvcc.stdout | trim) if courseware_llama_nvcc.rc == 0 else (courseware_llama_nvcc_fallback.stdout | trim) }}"
    courseware_llama_cuda_bin_dir: "{{ ((courseware_llama_nvcc.stdout | trim) if courseware_llama_nvcc.rc == 0 else (courseware_llama_nvcc_fallback.stdout | trim)) | dirname }}"
  when:
    - ansible_system == "Linux"
    - courseware_llama_nvcc.rc == 0 or courseware_llama_nvcc_fallback.rc == 0

- name: Set llama.cpp backend flag
  set_fact:
    courseware_llama_backend_flag: "-DGGML_CUDA=ON"

- name: Set llama.cpp build parallelism
  set_fact:
    courseware_llama_build_jobs: >-
      {{
        [
          1,
          [
            ansible_processor_vcpus | default(ansible_processor_nproc | default(1)) | int,
            ((ansible_memtotal_mb | int) // 8192) | int,
            4
          ] | min
        ] | max
      }}

- name: Build llama.cpp CMake configure arguments
  set_fact:
    courseware_llama_cmake_configure_argv: >-
      {{
        [
          'cmake',
          '-S',
          courseware_repos_dir ~ '/llama.cpp',
          '-B',
          courseware_repos_dir ~ '/llama.cpp/build',
          '-DCMAKE_BUILD_TYPE=Release',
          courseware_llama_backend_flag
        ]
        +
        (
          ['-DCMAKE_CUDA_COMPILER=' ~ courseware_llama_cuda_compiler]
          if ansible_system == 'Linux' and (courseware_llama_cuda_compiler | default('') | length > 0)
          else []
        )
      }}

- name: Configure llama.cpp
  command:
    argv: "{{ courseware_llama_cmake_configure_argv }}"
  environment:
    PATH: "{{ (courseware_llama_cuda_bin_dir ~ ':' ~ ansible_env.PATH) if ansible_system == 'Linux' and (courseware_llama_cuda_bin_dir | default('') | length > 0) else ansible_env.PATH }}"
    CUDACXX: "{{ courseware_llama_cuda_compiler | default(omit) }}"

- name: Build llama.cpp tools
  command:
    argv:
      - cmake
      - --build
      - "{{ courseware_repos_dir }}/llama.cpp/build"
      - --parallel
      - "{{ courseware_llama_build_jobs }}"
      - --target
      - llama-cli
      - llama-quantize
      - llama-perplexity
      - llama-server
  environment:
    PATH: "{{ (courseware_llama_cuda_bin_dir ~ ':' ~ ansible_env.PATH) if ansible_system == 'Linux' and (courseware_llama_cuda_bin_dir | default('') | length > 0) else ansible_env.PATH }}"
    CUDACXX: "{{ courseware_llama_cuda_compiler | default(omit) }}"

- name: Check system PATH slots for llama.cpp tools
  stat:
    path: "/usr/local/bin/{{ item }}"
    follow: false
  loop:
    - llama-cli
    - llama-quantize
    - llama-perplexity
    - llama-server
  register: courseware_llama_path_slots
  when: ansible_system == "Linux"

- name: Link llama.cpp tools into /usr/local/bin
  become: true
  file:
    src: "{{ courseware_llama_cpp_bin_dir }}/{{ item.item }}"
    dest: "/usr/local/bin/{{ item.item }}"
    state: link
    force: true
  loop: "{{ courseware_llama_path_slots.results | default([]) }}"
  when:
    - ansible_system == "Linux"
    - not item.stat.exists or item.stat.islnk
    - not item.stat.exists or item.stat.lnk_source == (courseware_llama_cpp_bin_dir ~ '/' ~ item.item)