Initial commit

This commit is contained in:
c4ch3c4d3
2026-03-22 16:17:20 -06:00
commit 3bafa35460
55 changed files with 1885894 additions and 0 deletions
+4
View File
@@ -0,0 +1,4 @@
/** @type {import("eslint").Linter.Config} */
module.exports = {
extends: ["next/core-web-vitals"],
};
+32
View File
@@ -0,0 +1,32 @@
# dependencies
node_modules/
# next.js / build output
.next/
out/
dist/
coverage/
.turbo/
.vercel/
*.tsbuildinfo
# env
.env
.env.local
.env.development.local
.env.test.local
.env.production.local
# logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
# OS / editor noise
.DS_Store
Thumbs.db
# local archive from refactor
archive/
+45
View File
@@ -0,0 +1,45 @@
# Notebook Conversion Site
Clean markdown-first Next.js app for browsing converted notebook labs.
## Run
1. Install dependencies:
```bash
npm install
```
2. Start the dev server:
```bash
npm run dev
```
3. Open `http://localhost:3000`.
## Project Structure
```text
content/
labs/ # .md/.mdx lab files and image asset folders
src/
app/
page.tsx # home page
labs/
page.tsx # lab index
[slug]/page.tsx # lab renderer
components/
SiteHeader.tsx
labs/LabContent.tsx
lib/
labs.ts # lab file discovery + frontmatter loader
styles/
globals.css
```
## Notes
- Labs are loaded from `content/labs`.
- Both `.md` and `.mdx` files are discoverable.
- Legacy folders from the old workspace were moved to `archive/` during refactor.
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
Binary file not shown.
+135
View File
@@ -0,0 +1,135 @@
#!/usr/bin/env python3
"""
Convert TTPs CSV to Parquet format optimized for embedding frameworks.
Features:
- Explodes TTPs into separate rows (one TTP per row)
- Parses TTP ID and TTP Name into separate columns
- Cleans HTML entities and escapes
- Handles malformed rows gracefully
"""
import pandas as pd
import re
from pathlib import Path
# File paths
INPUT_FILE = "/Users/bzuccaro/Library/Mobile Documents/com~apple~Keynote/Documents/Notebook Conversion - Refractor/Support Files/TTPs - Full Dataset.csv"
OUTPUT_FILE = "/Users/bzuccaro/Library/Mobile Documents/com~apple~Keynote/Documents/Notebook Conversion - Refractor/Support Files/ttps_dataset.parquet"
def clean_text(text: str) -> str:
"""Clean HTML entities and normalize text."""
if not isinstance(text, str):
return text
# Replace HTML entities
replacements = {
'&': '&',
'>': '>',
'&lt;': '<',
'&quot;': '"',
'&#39;': "'",
}
for entity, char in replacements.items():
text = text.replace(entity, char)
return text.strip()
def parse_ttps(ttp_string: str) -> list:
"""
Parse TTP string like "['T1057 - Process Discovery', 'T1569.002 - System Services']"
into list of tuples: [('T1057', 'Process Discovery'), ('T1569.002', 'System Services')]
"""
if not isinstance(ttp_string, str) or not ttp_string.strip():
return []
# Remove surrounding brackets if present
ttp_string = ttp_string.strip()
if ttp_string.startswith("[") and ttp_string.endswith("]"):
ttp_string = ttp_string[1:-1]
# Parse individual TTPs
ttps = []
# Match pattern like 'T1057 - Process Discovery' or "T1057 - Process Discovery"
pattern = r"['\"]?(T[\d\.]+)\s*-\s*([^'\"]+)['\"]?"
matches = re.findall(pattern, ttp_string)
for match in matches:
ttp_id, ttp_name = match
ttps.append({
'ttp_id': ttp_id.strip(),
'ttp_name': ttp_name.strip()
})
# If regex didn't find anything, try simpler split
if not ttps and ',' in ttp_string:
items = ttp_string.split(',')
for item in items:
item = item.strip().strip("'\"").strip()
if ' - ' in item:
parts = item.split(' - ', 1)
ttps.append({
'ttp_id': parts[0].strip(),
'ttp_name': parts[1].strip()
})
return ttps
def main():
print(f"Reading CSV from: {INPUT_FILE}")
# Read CSV
df = pd.read_csv(INPUT_FILE)
print(f"Read {len(df)} rows with columns: {df.columns.tolist()}")
# Clean text in Scenario column
print("Cleaning HTML entities in Scenario column...")
df['Scenario'] = df['Scenario'].apply(clean_text)
# Parse TTPs and explode into separate rows
print("Parsing TTPs and exploding to separate rows...")
# Convert TTP column to list of dicts
parsed_data = []
for idx, row in df.iterrows():
scenario = row['Scenario']
ttp_list = parse_ttps(str(row['TTP']))
if ttp_list:
# Deduplicate by TTP_ID within each scenario
seen_ids = set()
for ttp in ttp_list:
if ttp['ttp_id'] not in seen_ids:
seen_ids.add(ttp['ttp_id'])
parsed_data.append({
'Scenario': scenario,
'TTP_ID': ttp['ttp_id'],
'TTP_Name': ttp['ttp_name']
})
else:
# Keep rows with no valid TTPs but with empty TTP fields
parsed_data.append({
'Scenario': scenario,
'TTP_ID': None,
'TTP_Name': None
})
# Create new dataframe
new_df = pd.DataFrame(parsed_data)
print(f"Exploded to {len(new_df)} rows")
# Save to parquet
print(f"Saving to Parquet: {OUTPUT_FILE}")
new_df.to_parquet(OUTPUT_FILE, index=False, engine='pyarrow')
# Print stats
print("\n=== Conversion Complete ===")
print(f"Original rows: {len(df)}")
print(f"Parquet rows: {len(new_df)}")
print(f"Columns: {new_df.columns.tolist()}")
print(f"Rows with TTP_ID: {new_df['TTP_ID'].notna().sum()}")
print(f"Sample rows:")
print(new_df.head(10).to_string())
if __name__ == "__main__":
main()
@@ -0,0 +1,267 @@
<!-- breakout-style: instruction-rails -->
<!-- step-style: underline -->
<!-- objective-style: divider -->
# Lab 1 - Visualizing LLMs in TransformerLab
In this lab, we will:
* Download and Visualize LLama-3.2-1B-Instruct
* Visualize Tokenization & Prediction with LLama-3.2-1B-Instruct
<div class="lab-callout lab-callout--info">
<strong>Lab Flow Guide</strong><br />
<strong>Explore</strong> sections focus on observation and interpretation.<br />
<strong>Execute</strong> steps require performing actions in the lab environment.
</div>
## Objective 1: Starting TransformerLab
### Execute: Access the Lab Environment
To start Lab 1, ensure you've received a WireGuard configuration and system IP from your instructor. If you're unfamiliar with WireGuard, assistance will be provided to ensure you can access the lab environment for the duration of class.
All systems use the default username and password of `student`. All labs are located in the student home folder. To start Lab 1, run
```bash
~/lab1/lab1_start.sh
```
using the `lab1_start.sh` script in the `lab1` folder.
Lastly, if necessary, you can `su -` to root at any time. No password will be required
## Objective 2: Visualizing a LLM
### Explore: Understand the Model and Runtime
The next steps will guide us through the process of deploying and interacting with a pre-trained LLM, `LLama-3.2-1B-Instruct`. To do this, well be utilizing an inference engine software designed to execute LLM models and generate token predictions. You'll encounter models packaged in the **GGUF** format, a file format designed for efficient storage and loading of quantized LLMs, enabling them to run on a wider range of hardware. Don't worry if these terms are new to you the specifics of inference engines and the details of **GGUF** quantized LLMs will be thoroughly explained in the following section of this course.
Normally to start, we'll need to install an **inference engine** capable of running **GGUF** files.
### Execute: Verify the FastChat Plugin
Navigate to **Plugins**, and in the search bar type `Fastchat`. Note that it has already been installed for you!
<figure style="text-align: center;">
<a href="https://imgur.com/9Waj8VG.png" target="_blank">
<img
src="https://imgur.com/9Waj8VG.png"
style="width: 90%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Plugins
</figcaption>
</figure>
<br>
### Execute: Find and Load `LLama-3.2-1B-Instruct`
Next, navigate to **Model Registry**. You should see `LLama-3.2-1B-Instruct` right away on your screen, but if not, please start searching for this model using the search bar.
<figure style="text-align: center;">
<a href="https://i.imgur.com/UyWdnMR.png" target="_blank">
<img
src="https://i.imgur.com/UyWdnMR.png"
style="width: 90%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Model Registry Selection.
</figcaption>
</figure>
<br>
Once downloaded, Select **Foundation** & our newly downloaded `LLama-3.2-1B-Instruct` model.
<figure style="text-align: center;">
<a href="https://i.imgur.com/Aez94RU.png" target="_blank">
<img
src="https://i.imgur.com/Aez94RU.png"
style="width: 90%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Model Selection
</figcaption>
</figure>
<br>
Once selected, click **Run**. Give TransformerLab a moment to successfully load the model.
<figure style="text-align: center;">
<a href="https://i.imgur.com/f4YcA8P.png" target="_blank">
<img
src="https://i.imgur.com/f4YcA8P.png"
style="width: 90%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Starting a Model
</figcaption>
</figure>
<br>
### Explore: Inspect the Architecture View
To start, lets navigate to the **Interact** page, and then select **Model Architecture** from the Chat drop down.
<figure style="text-align: center;">
<a href="https://i.imgur.com/X0CM31h.png" target="_blank">
<img
src="https://i.imgur.com/X0CM31h.png"
style="width: 90%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Model Architecture Dropdown
</figcaption>
</figure>
<br>
This page allows us to visualize the actively loaded model, in this case our downloaded `LLama-3.2-1B-Instruct-`. This interactive view is equivalent to the greatly simplified version shown on the slide “Transformation: Multylayer Perceptron” from our lecture. We can explore this view by:
* Holding down both right and left mouse buttons and dragging will move the entire model.
* Holding down just the left mouse button will allow you to rotate the view.
<figure style="text-align: center;">
<a href="https://i.imgur.com/8hXTGlt.png" target="_blank">
<img
src="https://i.imgur.com/8hXTGlt.png"
style="width: 90%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Model Visualization
</figcaption>
</figure>
<br>
### Explore: Interpret Layers, Blocks, and Parameters
Each layer of the model performs a specific task, taking the input provided, and transforming it into the statistically most likely completion of text, token by token. This format of Llama 3.1 1B is made up of 372 **layers**. Each layer will transform the input of the layer above it, until eventually, we end up with the statically likely completion.
You have likely also noticed that the colors repeat. Each set of repeating **layers** is organized into **blocks**. Each **block** is a grouping of **layers** that perform the same functions, but with a slightly different focus. For example, one **block** may focus on nouns, and another may focus on adjectives, and so on.
The **layers** within Llama 3.1 1B are as follows:
<ul class="concept-pill-list">
<li>
<span class="concept-pill-label">Attention:</span>
<span>Focuses the model on specific parts of an input sequence to more accurately predict the next token.</span>
</li>
<li>
<span class="concept-pill-label">Weights:</span>
<span>The core learnable parameters of the network.</span>
</li>
<li>
<span class="concept-pill-label">Biases:</span>
<span>Additional parameters added after the weighted sum to shift (transform) the output.</span>
</li>
<li>
<span class="concept-pill-label">Scale:</span>
<span>Normalizes the output of previous <strong>layers</strong> to prepare the next round of transformation.</span>
</li>
</ul>
Each of these **layers** also has a different type, corresponding to Q, K, V, and much more.
5. The **layers** between the small “Attention” **layers** are all considered to make up a single “block.”
To the side, we can see the actual number values of each weight within each layer.
Fundamentally, the LLM itself is this stack of numbers. Those numbers allow us to transform tokenized input (such as English), and transform that into a useful output. The more **layers** & **blocks**, the bigger the model, the more accurate and “intelligent” the model will behave. This 1B parameter model is incredibly small however, so the “truthfulness” of generated predictions is likely to be suspect (aka Hallucinated). The model will at least sound very confident however!
<br>
---
## Objective 3: Tokenization & Prediction with LLama-3.2-1B-Instruct
### Execute: Interactive Chat
Lets next move on to active conversation with the model. Navigate to the **Chat** tab from the dropdown menu.
<figure style="text-align: center;">
<a href="https://i.imgur.com/e40Jrku.png" target="_blank">
<img
src="https://i.imgur.com/e40Jrku.png"
style="width: 90%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Select Chat
</figcaption>
</figure>
<br>
Once loaded, feel free to type any message and interact with the model in any way. To speed up the pace of our lab, I recommend setting your maximum output length to 64 tokens.
<figure style="text-align: center;">
<a href="https://i.imgur.com/MdAIKLn.png" target="_blank">
<img
src="https://i.imgur.com/MdAIKLn.png"
style="width: 90%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Maximum Length - 64
</figcaption>
</figure>
<br>
If text generation fails, or acts weird (such as merely repeating your input back to you), unload and reload the model using the previous Foundation screen from the last Objective.
### Execute: View Tokenization
If everything is in working order, review the **Tokenize** view. This allows us to visually see how Llama 3.2 will convert our input text into “tokens,” or numbers that represent the input English. Feel free to input any sentence into the box to review what the final tokenized version will be.
<figure style="text-align: center;">
<a href="https://i.imgur.com/I9tU8jK.png" target="_blank">
<img
src="https://i.imgur.com/I9tU8jK.png"
style="width: 90%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Tokenize View
</figcaption>
</figure>
<br>
### Execute: Visualize Next-Token Activations
Next, select Model Activations. By entering “The quick brown fox” and selecting visualize, we can see how the model selects the next word, and the models level of confidence. Also feel free to redo this process with alternative sentences.
<figure style="text-align: center;">
<a href="https://i.imgur.com/JeWpoqV.png" target="_blank">
<img
src="https://i.imgur.com/JeWpoqV.png"
style="width: 90%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Next Word Prediction
</figcaption>
</figure>
<br>
### Execute: Compare Confidence Views
Note how confident the model is about the word jumps in this famous phrase. For an alternative view of the same output, you can also select the **Visualize Logprobes** option from the menu, which will show the same information but by color.
<figure style="text-align: center;">
<a href="https://i.imgur.com/PvkgQUr.png" target="_blank">
<img
src="https://i.imgur.com/PvkgQUr.png"
style="width: 90%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Green is Confident. Red is less confident.
</figcaption>
</figure>
<br>
### Explore: Continue Exploring TransformerLab Features
Please continue to explore Transformers Lab until youre ready to move on. While we will utilize many different tools other than Transformers Lab throughout this course due to its beta nature, this software is improving all the time and is worth watching! Transformers lab supports many advanced features, in various stages of development, such as:
* Batch Text Generation
* LLM Fine Tuning
* LLM Evaluation
* Retrieval Augmented Generation (RAG)
We will discuss these topics and more throughout the course.
<br>
---
## Conclusion
In this lab, we observed the foundational concepts of all LLMs in action using TransformerLab. Through hands-on exploration, we observed the process of tokenization how text is converted into numerical representations for the model and visualized the model's prediction process, including its confidence levels for different token selections. By navigating the models layers and blocks, we gained an appreciation for the sheer scale and complexity inherent in modern LLMs.
This initial experience provides a crucial stepping stone for further exploration of LLMs, laying the groundwork for future labs focused on fine-tuning, evaluation, and advanced techniques like Retrieval Augmented Generation.
+543
View File
@@ -0,0 +1,543 @@
<!-- breakout-style: instruction-rails -->
<!-- step-style: underline -->
<!-- objective-style: divider -->
# Lab 2 - LLaMa.cpp, Ollama & Quantization
In this lab, we will:
* Download a model from huggingface.com and quantize it for llama.cpp
* Download a model from huggingface.com infer it in llama.cpp
* Download a model from ollama.com
* Download a custom model from huggingface.com
* Import a custom model into Ollama.
<div class="lab-callout lab-callout--info">
<strong>Lab Flow Guide</strong><br />
<strong>Explore</strong> sections focus on investigation and comparison.<br />
<strong>Execute</strong> sections require running commands and producing output.
</div>
## Objective 1: HuggingFace & LLaMa.cpp
### 1. What Is LLaMa.cpp?
LLaMa.cpp is an open-source project created to enable efficient running of Meta's LLaMA (Large Language Model Meta AI) family of large language models on consumer-grade hardware. It was initially developed by **Georgi Gerganov** in early March 2023, shortly after Meta released the weights of the LLaMA models to approved researchers.
The projects original goal was to make LLaMA models accessible on systems without powerful GPUs, including laptops, desktops, and even mobile devices. **LLaMa.cpp** achieves this by implementing the LLaMA inference in pure C/C++ and introducing highly efficient quantization techniques, allowing models to run with drastically reduced memory requirements. **LLaMa.cpp** is also the underlying project behind a number of inference wrappers and technologies, such as Llamafile, LM Studio, and Ollama, amongst many others.
### Key Features
| Capability | Why it matters |
|------------|----------------|
| **Efficient local inference** | Runs large language models without a powerful GPU. |
| **Quantization tools** (`llama-quantize`) | Shrinks model size (down to 1-bit) while preserving usable performance. |
| **Model conversion to .GGUF** | Provides a compact, fast-loading format that works with Ollama, LM Studio, and other wrappers. |
| **Cross-platform support** | Works on Linux, macOS, Windows, Apple Silicon, and ARM devices. |
| **CLI and debugging utilities** (`llama-cli`, `gguf-dump.py`) | Enables quick interactive testing and inspection of model metadata. |
| **Perplexity measurement** (`llama-perplexity`) | Quantifies how confident the model is about its predictions. |
| **Active community** | Powers tools such as LM Studio, Llamafile, and Ollama. |
---
## 1.2 Explore: HuggingFace - Model Cards
[HuggingFace](https://huggingface.com) is the “GitHub” for LLMs, datasets, and more. The following steps walk you through locating Metas **LLaMA3.21B** model card and its files.
1. **Open the LLaMA3.21B page**
<https://huggingface.co/meta-llama/Llama-3.2-1B>
<br>
2. **Read the model card** note the description, license, tags (e.g., *Text Generation*, *SafeTensors*, *PyTorch*), and links to finetunes/quantizations.
<br>
3. **Navigate to “Quantizations.”**
This tab lists communitycreated quantizations, including GGUF, GTPQ, AWQ, and EXL3 versions. Common providers include **Bartowski**, **Unsloth**, and **NousResearch**, although these players change periodically. Additionally, note that we can often download quantized versions *without* having agreed to the Meta license restrictions for the original model.
<figure style="text-align:center;">
<a href="https://i.imgur.com/Po0Ll3o.png" target="_blank">
<img src="https://i.imgur.com/Po0Ll3o.png" width="800" style="border:5px solid black;">
</a>
<figcaption>Model Card Quantizations Convenience Link</figcaption>
</figure>
<br>
<figure style="text-align:center;">
<a href="https://i.imgur.com/NM1rbXV.png" target="_blank">
<img src="https://i.imgur.com/NM1rbXV.png" width="800" style="border:5px solid black;">
</a>
<figcaption>Model Quantization Options</figcaption>
</figure>
4. **Open “Files and versions.”**
Here you see the raw `.safetensors` files (the unquantized checkpoint). For the model to successfully run, the full set of files needs to be loaded into system memory. Note how this 1Bparameter model is small enough to fit comfortably in a phones memory, even raw.
<figure style="text-align:center;">
<a href="https://i.imgur.com/6I9zkeu.png" target="_blank">
<img src="https://i.imgur.com/6I9zkeu.png" width="800" style="border:5px solid black;">
</a>
<figcaption>Distrubution Restriction</figcaption>
</figure>
Unless you've accepted Meta's EULA for this model, you'll be unable to download the model directly from Meta. This view may or may not appear based on your own HuggingFace account.
## 1.3 Explore: HuggingFace - Find and Download WhiteRabbitNeo
For this lab we will work with **WhiteRabbitNeoV37B**, a cybersecurityoriented finetune of Qwen2.5Coder7B. This model is less popular than LLaMA-3.2, and if we'd like to run this models in Ollama, we'll need to perform our own quantization.
<div class="lab-callout lab-callout--warning">
<strong>Warning:</strong> Although the next two steps show how to find and download this model so you can replicate the process, support files are already provided in <code>/home/student/lab2/WhiteRabbitNeo</code> to speed up lab execution.
</div>
### 1. Locate & download the model
1. Go to <https://huggingface.co/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B>.
2. Points of Interest on this modelcard:
1. This model appears to be a fine tune of **Qwen2.5-Coder-7B**
2. This model is openly licensed, and does have any requirements to download and use for our purposes.
3. This model is in **Safetensors** format, which is compatible with **LLaMa.cpp**'s quantization tools.
<figure style="text-align:center;">
<a href="https://i.imgur.com/9GrHRuh.png" target="_blank">
<img src="https://i.imgur.com/9GrHRuh.png" width="800" style="border:5px solid black;">
</a>
<figcaption>WhiteRabbitNeo model card.</figcaption>
</figure>
3. Click **Files and versions** → review the `.safetensors` checkpoints (≈ 15GB @ **FP16*).
<figure style="text-align:center;">
<a href="https://i.imgur.com/Emx97nL.png" target="_blank">
<img src="https://i.imgur.com/Emx97nL.png" width="800" style="border:5px solid black;">
</a>
<figcaption>Model safetensors (size ≈ 15GB).</figcaption>
</figure>
### 2 Download the Model
To prepare this model, create a target folder wherever you desire on your system to work out of. Once chosen, perform the following:
1. Ensure you have git & git-lfs installed to enable successful cloning from HuggingFace. If necessary, git can be installed on Debian based distributions via:
```bash
sudo apt install git git-lfs
git lfs install
```
2. Clone the model:
```bash
git clone https://huggingface.co/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B
```
### 3 Execute: Convert the Downloaded Model
**LLaMa.cpp** makes it easy for us to package models downloaded in SafeTensors format to GGUF. We can convert the model with the following official project script command:
```bash
python3 convert_hf_to_gguf.py /home/student/lab2/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B/WhiteRabbitNeo-V3-7B --outfile /home/student/lab2/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B.gguf
```
### 4 Execute: Review Model Metadata
When these steps have completed, you should see a new WhiteRabbitNeo-V3-7B.gguf file. We have not yet quantized the model, merely converted it to a format usable by **LLaMa.cpp** for the next steps. We can tell if this process was successful by using the included **gguf-dump.py** script that is packaged with **LLaMa.cpp**.
Run the following command:
```bash
gguf-dump /home/student/lab2/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B.gguf
```
We should then see:
<figure style="text-align: center;">
<a href="https://i.imgur.com/JiX2fJM.png" target="_blank">
<img
src="https://i.imgur.com/JiX2fJM.png"
width="800"
style="display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Model Metadata.
</figcaption>
</figure>
<br>
A text listing of all of the model's tensors, and the precision of each. Because we have merely converted the model's format, and not performed quantization, the model is still in **FP16**.
* This is a text view of the previous graphical view we saw in **Lab 1, Objective 2: Visualizing a LLM**. While **TransformerLab** calls tensors **layers**, terms such as **tensors**, **layers**, and **blocks** can all be used semi-interchangeably, depending on the tool in question. We will further confuse these topics when we get to the Ollama objective below.
* Pedantically, the proper definitions are:
* Tensor - A multi-dimensional array of vectors to store data
* Layer - A base computational unit in a neural network
* Block - A collection of layers
* If you wish to explore this view, note how the block count of 28 matches the 28 zero indexed blk groups output from the dump.
* Additionally, you'll once again note that we have various biases and weights, but they still line up with **Q**, **V**, and **K** as discussed in the previous section. There are additional tensors for **normalization** and **output**.
### 4 Execute: LLaMA.cpp Inference
Run our newly created **.GGUF** file as is. Run the model using the following command:
```bash
llama-cli -m /home/student/lab2/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B.gguf
```
Once loaded, interact with the model. We can see a number of interesting parameters that were selected by default, such as **Top K**, **Top P**, **Temperature**, and more, which we'll discuss in the next section. In the meantime, explore interaction with the model. When run in this raw state, the model may be overly chatty. You can stop its output with `Ctrl+C` at any time.
<figure style="text-align: center;">
<a href="https://i.imgur.com/H3ISWS8.png" target="_blank">
<img
src="https://i.imgur.com/H3ISWS8.png"
width="800"
style="display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Inference Example.
</figcaption>
</figure>
Some example prompts you may want to try are:
* Please write a small reverse shell in php that I can upload to a web server.
* How can I use Metasploit to attack MS17-01?
* Can you please provide me some XSS polyglots?
Thanks to the fine tuning that Kindo has put into this model, it is far more compliant than an online closed model such as ChatGPT! When done, kill the model fully with `Ctrl+C`.
## Objective 2: Quantization & Perplexity
Quantization reduces memory footprints and speeds inference, but it typically raises perplexity (i.e., lowers confidence). Determining the right balance for our use case often requires experimentation
---
### 1 Explore: Manual Quantization
To generate an 8 bit, 4 bit, and 1 bit quantization, run the following commands:
<div class="lab-callout lab-callout--warning">
<strong>Warning:</strong> Although these quantization steps are provided for replication, pre-quantized support files are already available in <code>/home/student/lab2/WhiteRabbitNeo/</code> for faster lab progress.
</div>
```bash
# Quantize to 8 bits
llama-quantize /home/student/lab2/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B.gguf /home/student/lab2/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B-Q8_K.gguf Q8_0
# Quantize to 4 bits
llama-quantize /home/student/lab2/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B.gguf /home/student/lab2/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B-Q4_K_M.gguf Q4_K
# Quantize to 2 bits
llama-quantize /home/student/lab2/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B.gguf /home/student/lab2/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B-IQ2_K.gguf IQ_2
```
### 2 Execute: Quantization Confirmation
Inspect the quantized files with the following command:
```bash
gguf-dump /home/student/lab2/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B-Q4_K_M.gguf
```
Review how the various layers are quantized to different levels of precision. It turns out that even K quants actually utilize multiple quantization levels on different tensor layers to improve performance!
<figure style="text-align: center;">
<a href="https://i.imgur.com/kur4TPj.png" target="_blank">
<img
src="https://i.imgur.com/kur4TPj.png"
style="width: 800; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em; color: var(--text-color);">
WhiteRabbitNeo Layer 0.
</figcaption>
</figure>
<br>
<details>
<summary style="font-weight:bold; color:#a94442; cursor:pointer;">
Full explanation for the brave...
</summary>
### What each Tensor Layer does
### **1. Token Embeddings**
- **Tensor 1: `token_embd.weight`**
- **Responsibility:** Maps each token in the vocabulary to a dense vector of size 3584.
---
### **2. Layer Normalization**
- **Tensor 2: `blk.0.attn_norm.weight`**
- **Responsibility:** Scales the normalized inputs to the self-attention mechanism in the first block.
- **Tensor 6: `blk.0.ffn_norm.weight`**
- **Responsibility:** Scales the normalized outputs of the feed-forward network (FFN) in the first block.
---
### **3. Feed-Forward Network (FFN)**
- **Tensor 3: `blk.0.ffn_down.weight`**
- **Responsibility:** Projects the input from dimension 3584 to 18944 in the FFN down-projection.
- **Tensor 4: `blk.0.ffn_gate.weight`**
- **Responsibility:** Projects the output back to dimension 3584 after the non-linear transformation in the FFN gate mechanism.
- **Tensor 5: `blk.0.ffn_up.weight`**
- **Responsibility:** Projects the output of the non-linear transformation back to dimension 3584 in the FFN up-projection.
---
### **4. Self-Attention Mechanism**
#### **Key Projection**
- **Tensor 7: `blk.0.attn_k.bias`**
- **Responsibility:** Adds a learnable offset to the key vectors in the self-attention mechanism.
- **Tensor 8: `blk.0.attn_k.weight`**
- **Responsibility:** Projects the input to dimension 512 for key vectors in the self-attention mechanism.
#### **Query Projection**
- **Tensor 10: `blk.0.attn_q.bias`**
- **Responsibility:** Adds a learnable offset to the query vectors in the self-attention mechanism.
- **Tensor 11: `blk.0.attn_q.weight`**
- **Responsibility:** Projects the input to dimension 3584 for query vectors in the self-attention mechanism.
#### **Value Projection**
- **Tensor 12: `blk.0.attn_v.bias`**
- **Responsibility:** Adds a learnable offset to the value vectors in the self-attention mechanism.
- **Tensor 13: `blk.0.attn_v.weight`**
- **Responsibility:** Projects the input to dimension 512 for value vectors in the self-attention mechanism.
#### **Attention Output Projection**
- **Tensor 9: `blk.0.attn_output.weight`**
- **Responsibility:** Projects the concatenated attention outputs back to dimension 3584 before residual connection.
---
### **Summary by Purpose**
- **Token Embeddings:** Maps tokens to dense vectors.
- **Layer Normalization:** Scales normalized inputs/outputs in attention and FFN blocks.
- **Feed-Forward Network (FFN):** Handles down-projection, gating, and up-projection for non-linear transformations.
- **Self-Attention Mechanism:** Manages key, query, value projections, biases, and output projection for attention computations.
</details>
### 3 Execute: Quantitatively Measuring Perplexity
Perplexity is a measurement of how confident the model is about its next token prediction. Initially confusingly, lower values indicate higher confidence. By asking the model to infer a relatively large input (minimum 1024 tokens), we can generate an average perplexity score to gauge the models confidence.
```bash
# Perplexity test with FP16 model
llama-perplexity -m /home/student/lab2/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B.gguf -f /home/student/lab2/wiki.test.raw 2>&1 | grep Final
# Perplexity test with 8-bit quantized model
llama-perplexity -m /home/student/lab2/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B-Q8_K.gguf -f /home/student/lab2/wiki.test.raw 2>&1 | grep Final
# Perplexity test with 4-bit quantized model
llama-perplexity -m /home/student/lab2/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B-Q4_K_M.gguf -f /home/student/lab2/wiki.test.raw 2>&1 | grep Final
# Perplexity test with 2-bit quantized model
llama-perplexity -m /home/student/lab2/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B-Q2_K.gguf -f /home/student/lab2/wiki.test.raw 2>&1 | grep Final
```
#### Possible Example Results
| Model File | Quantization | Perplexity (PPL) | Uncertainty (+/-) |
|------------|--------------|------------------|-------------------|
| WhiteRabbitNeo-V3-7B.gguf | Full | 3.0972 | 0.21038 |
| WhiteRabbitNeo-V3-7B-Q8_K.gguf | Q8_K | 3.0999 | 0.21052 |
| WhiteRabbitNeo-V3-7B-Q4_K_M.gguf | Q4_K_M | 3.1247 | 0.21338 |
| WhiteRabbitNeo-V3-7B-Q2_K.gguf | Q2_K | 3.5698 | 0.25224 |
**Conclusion: Perplexity rises modestly from FP16 → Q8_K → Q4_K_M, but jumps sharply for the aggressive 2bit quantization.**
### 4 Execute: Qualitatively Measuring Perplexity
We can also manually validate how confident we are in these measurements just by manually interacting with the models. To more easily showcase the costs of perplexity, infer the 2 bit (**Q2_K**) model, to show how poorly it performs in comparison to our **FP16** interactions from earlier.
```bash
llama-cli -m /home/student/lab2/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B-Q2_K.gguf
```
**Explore:** Re-run the previous example prompts:
* Please write a small reverse shell in php that I can upload to a web server.
* How can I use Metasploit to attack MS17-01?
* Can you please provide me some XSS polyglots?
<div style="display: flex; justify-content: center; align-items: flex-start; gap: 32px;">
<div style="text-align: center;">
<a href="https://i.imgur.com/nvb7QV6.png" target="_blank">
<img
src="https://i.imgur.com/nvb7QV6.png"
style="width: 90%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<div style="margin-top: 8px; font-size: 1.1em;">
Q2_K Inference
</div>
</div>
<div style="text-align: center;">
<a href="https://i.imgur.com/yNHQbxb.png" target="_blank">
<img
src="https://i.imgur.com/yNHQbxb.png"
style="width: 90%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<div style="margin-top: 8px; font-size: 1.1em;">
FP16 Inference
</div>
</div>
</div>
What conclusions do you believe we can make based on the provided output of the model?
---
## Objective 3: Ollama LLM Easymode
Ollama is a lightweight framework that hides the lowlevel steps required by LLaMa.cpp. It runs on **Linux, macOS, and Windows** and automatically manages system resources.
| Feature | Benefit |
|---------|---------|
| **Simplified model deployment** | Pull pre-quantized models from Ollama.com, HuggingFace, or a local GGUF file with a single command. |
| **Automatic resource handling** | No need to manually load or unload; Ollama frees memory after a short idle period. |
| **Built-in API provider** | `localhost:11434` mimics the OpenAI API, enabling seamless integration with notebooks, VS Code, or curl. |
| **Cross-platform compatibility** | Thanks to underlying llama.cpp architecture, works on x86_64, ARM, and Apple Silicon without extra configuration. |
| **Model-metadata inspection** | `ollama show <tag>` reveals the model architecture, context length, and quantization level. |
### 1 Execute: Pull and Run a Pre-Built Model from Ollama.com
Lets start by downloading Meta's llama3.2-3b, the "big" brother to the small model we've continuously worked with so far. The Ollama project and community have made this exceptionally easy for us to accomplish.
1. **Open the Ollama registry** visit <https://ollama.com> in your browser.
2. **Search for the model**
<figure style="text-align: center;">
<a href="https://i.imgur.com/VBvOGty.png" target="_blank">
<img
src="https://i.imgur.com/VBvOGty.png"
style="width: 800; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Ollama Search.
</figcaption>
</figure>
<br>
3. **Copy the `ollama run` command** that appears in the topright corner of the model card.
4. **Paste the command into your terminal** and press **Enter**:
```bash
> ollama run llama3.2
```
<figure style="text-align: center;">
<a href="https://i.imgur.com/ammtbmI.png" target="_blank">
<img
src="https://i.imgur.com/ammtbmI.png"
style="width: 800; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Ollama Run command.
</figcaption>
</figure>
<br>
### 2 Explore: Interacting with Ollama Inference
When finished, you will be presented with a prompt, similar to the `llama-cli` commands. No need to download, convert, or quantize! Feel free to interact with this model until you're ready to move on.
<figure style="text-align: center;">
<a href="https://i.imgur.com/XZ6OYNI.png" target="_blank">
<img
src="https://i.imgur.com/XZ6OYNI.png"
style="width: 800; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Ollama Inference.
</figcaption>
</figure>
<br>
### 3 Execute: Pull and Run a Pre-Built Model from HuggingFace.com
Similarly, we can do the same by pulling a model directly from **HuggingFace**. As long as the source file is a .gguf of any quantization level that fits within our system memory, Ollama can fetch it directly.
1. **Select a Quantized Model from Objective 1** visit [CodeIsAbstract](https://huggingface.co/CodeIsAbstract/Llama-3.2-1B-Q8_0-GGUF) in your browser.
2. **Use this model** - Click Use this model → choose the Ollama tab. The page displays a readytorun command:
<figure style="text-align: center;">
<a href="https://i.imgur.com/lg2INAs.png" target="_blank">
<img
src="https://i.imgur.com/lg2INAs.png"
style="width: 800; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
HuggingFace Direct Ollama Pull.
</figcaption>
</figure>
<br>
3. **Copy the command** and execute it in your terminal.
```bash
ollama run hf.co/CodeIsAbstract/Llama-3.2-1B-Q8_0-GGUF:Q8
```
4. **Explore:** Interact with the model as normal.
### 4 Execute: Load a Custom `.gguf` Model
We can also import our WhiteRabbitNeo **.GGUF** model into Ollama, without having to upload it to **HuggingFace** first. In order to do so however, we need to create a **ModelFile**, a `.yml` file that describes to **Ollama** where the **.GGUF** is located, as well as any additional defaults we'd like Ollama to run with when performing inference.
1. **Create a simple modelfile** This will tell Ollama where the model lives.
```bash
echo "FROM /home/student/lab2/WhiteRabbitNeo/WhiteRabbitNeo-V3-7B-Q4_K_M.gguf" > Modelfile
```
2. **Register the model with Ollama**
```bash
ollama create WhiteRabbitNeo -f Modelfile
```
3. **Run the newly registered model**
```bash
ollama run WhiteRabbitNeo
```
4. **Explore:** The model is now stored locally under the tag *WhiteRabbitNeo* and can be invoked just as any other model.
<figure style="text-align: center;">
<a href="https://i.imgur.com/ijsAl6m.png" target="_blank">
<img
src="https://i.imgur.com/ijsAl6m.png"
style="width: 800; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Importing WhiteRabbitNeo V3.
</figcaption>
</figure>
<br>
---
#### Additional Useful Ollama Commands
| Command | Description |
|---------|-------------|
| `ollama list` | Shows all models currently registered with Ollama. |
| `ollama rm <tag>` | Deletes the specified model (freeing disk space). |
| `ollama show <tag>` | Prints model metadata (architecture, context length, quantization). |
| `ollama serve` | Starts the OpenAI-compatible API server (runs automatically when you first use `ollama run`). |
<br>
---
## Conclusion
Ollama bridges the gap between low-level LLaMa.cpp tools and high-level usability, making it an ideal choice for rapid deployment and educational labs. By leveraging its API, model registry, and automation features, you can focus on experimentation rather than infrastructure. However, understanding LLaMa.cpps underlying mechanics (e.g., quantization, perplexity) remains critical for optimizing performance, or going off the beaten path.
<br>
---
Binary file not shown.

After

Width:  |  Height:  |  Size: 284 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 208 KiB

+331
View File
@@ -0,0 +1,331 @@
<!-- breakout-style: instruction-rails -->
<!-- step-style: underline -->
<!-- objective-style: divider -->
# Lab 3 - Open WebUI & Prompting
In this lab, we will:
* Run Open WebUI in Google Colaboratory
* Using an Ollama Model within Open WebUI
* Experimenting with Inference Parameters
* Experimenting with Prompting Techniques
<div class="lab-callout lab-callout--info">
<strong>Lab Flow Guide</strong><br />
<strong>Explore</strong> sections focus on investigation and comparison.<br />
<strong>Execute</strong> sections require running steps and validating output.
</div>
## Objective 1 Execute: Accessing Open WebUI
Your lab machine has been pre-installed with Open Webui. It is accessible on your provided system IP at port 8080 (http://<IP>:8080). You can log in with the following default credentials:
Username: student@openwebui.com
Password: student
Once you've successfully connected to Open WebUI, follow the registration instructions. Feel free to register with any information, as Kaggle instance will tear itself down after four hours (barring manual intervention or inactivity). Once successful, move on to the next objective.
<figure style="text-align: center;">
<a href="https://i.imgur.com/QrQwWuD.jpeg" target="_blank">
<img
src="https://i.imgur.com/QrQwWuD.jpeg"
style="width: 50%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Initial Registration
</figcaption>
</figure>
<br>
## Objective 2 Execute: Downloading Our First Model through Open WebUI (OUI)
Locate, pull, and run **Gemma34BITQAT** (a quantawaretrained model) using the **OpenWebUI** interface that talks to Ollama. By the end of this section you should be able to start a model with a single click and generate a response in the UI.
### Execute: Download Qwen 3.5 4B
1. **Open the Ollama model registry**
* Go to <https://ollama.com> in your web browser.
* Locate the search box at the top of the page.
<figure style="text-align:center;">
<a href="https://i.imgur.com/yQ9KMsa.png" target="_blank">
<img src="https://i.imgur.com/yQ9KMsa.png" width="600"
style="display:block; margin-left:auto; margin-right:auto; border:5px solid black;">
</a>
<figcaption>Ollama homepage use the search bar to look for “Gemma3”.</figcaption>
</figure>
2. **Find the Qwen 3.5 family**
* Type **`Qwen 3.5`** and press **Enter**.
* The results page lists several parameter sizes (1B → 27B).
3. **Navigate to the list of tags**
* Click the **`Tags`** link beneath the model description.
<figure style="text-align:center;">
<a href="https://i.imgur.com/NgcM7qx.png" target="_blank">
<img src="https://i.imgur.com/NgcM7qx.png" width="600"
style="display:block; margin-left:auto; margin-right:auto; border:5px solid black;">
</a>
<figcaption>Tag view each entry shows the model size and a short description.</figcaption>
</figure>
4. **Select the 4B variant**
* Locate **`Qwen3.5:4b`** in the table.
* The size column reads **`3.4GB`**, indicating the VRAM required for inference.
<figure style="text-align:center;">
<a href="https://i.imgur.com/nDPlOdd.png" target="_blank">
<img src="https://i.imgur.com/nDPlOdd.png" width="600"
style="display:block; margin-left:auto; margin-right:auto; border:5px solid black;">
</a>
<figcaption>Model size for `Qwen3.5:4b` (≈ 3.3GB VRAM).</figcaption>
</figure>
5. **Copy the model tag**
* Click the **copytoclipboard** icon next to the tag (or highlight the text and press **Ctrl+C**).
6. **Open the OpenWebUI interface**
* In a new browser tab, navigate to the URL where your OpenWebUI instance is running (e.g., `http://localhost:8080`).
7. **Pull the model through the UI**
* In the **“Select a model”** dropdown, paste the copied tag into the text field.
* Click **`Pull`**. The UI will display a progress bar while Ollama downloads the GGUF file.
<figure style="text-align:center;">
<a href="https://i.imgur.com/q4lXXtR.jpeg" target="_blank">
<img src="https://i.imgur.com/q4lXXtR.jpeg" width="600"
style="display:block; margin-left:auto; margin-right:auto; border:5px solid black;">
</a>
<figcaption>OpenWebUI paste the tag and press “Pull”.</figcaption>
</figure>
8. **Verify the model works**
* Once the download finishes, type a prompt in the chat window (e.g., “Tell me a short, funny story about a cat that learns to code”).
* Press **Enter** and watch the response appear.
<figure style="text-align:center;">
<a href="https://i.imgur.com/S73sewR.png" target="_blank">
<img src="https://i.imgur.com/S73sewR.png" width="600"
style="display:block; margin-left:auto; margin-right:auto; border:5px solid black;">
</a>
<figcaption>Successful inference the model returns a coherent answer.</figcaption>
</figure>
---
## Objective 3: Inference Settings
### Explore: OUI Inference Parameter Valves
Prior to this lab, we discussed inference settings such as Top K, Top P, and Temperature. Let's quickly review the most common settings to customize:
* `Context Length` - The amount of tokens the model is allowed to keep in active memory
* `Temperature` - Changes the score of low probability token generation
* `Top K` - Limits the possible tokens selection during inference to the most likely `K` selections
* `Top P` - Limits the possible tokens to those whose cumulative probability exceeds `P`
Open WebUI allows us to easily modify these parameters on the fly through the chat controls, found on the right hand side next to your user's icon.
<figure style="text-align: center;">
<a href="https://i.imgur.com/2sVULuh.png" target="_blank">
<img
src="https://i.imgur.com/2sVULuh.png"
style="width: 600px; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Chat Controls
</figcaption>
</figure>
<br>
By default, Open WebUI selects the following generically sound options, with the expectation that users have access to modest hardware:
* `Context Length` - 4096
* `Temperature` - .8
* `Top K` - 40
* `Top P` - .9
While we won't play with `Context Length`, this parameter is critical for successfully accomplishing more complicated tasks using local models. With only the small default context length value, the model will quickly forget your instructions and interactions, rendering the results the model generates less useful. Unfortunately, just increasing this value is not always an option, as your selected model + `Context Length` must fit within your available memory. As with many challenges in AI, a key to solving issues with `Context Length` is often scaling your hardware to meet the demands of the task. This generally means utilizing hardware with larger amounts of VRAM or unified memory either by purchasing it or renting access.
Additionally, these defaults can be overruled by the Ollama model file, which can specify its own "preferred" defaults. Below are the defaults that come with the model we've downloaded, or feel free to interactively explore the `params` page for the model at this link: [gemma3:12b-it-qat](https://ollama.com/library/gemma3:12b-it-qat/blobs/3116c5225075).
<figure style="text-align: center;">
<a href="https://i.imgur.com/CA8Fdl4.png" target="_blank">
<img
src="https://i.imgur.com/CA8Fdl4.png"
style="width: 600px; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Modelfile Defaults
</figcaption>
</figure>
<br>
The best model makers will often override the defaults with their own preferred ones, as we've just seen. These Google selected defaults were the values they found to produce the best outputs for most tasks. When possible, it is likely that you'll want to stick with these defaults unless you have a very good reason to change them.
Thankfully, our lab gives us just such a reason! We can manually modify these options with the aforementioned chat controls options. Depending on our end goal, we can either help the model to write more "creatively" or "precisely" through setting `Temperature`, `Top K`, and `Top P`.
Lets test this with a series of interactions, themed around Magic the Gathering. Gemma is considered a multi-modal model, meaning we're not just limited to inputing text! Input the following image, and ask `What is this? What does it do?`
Next, set our inference parameters to the following:
* `Temperature` - 1.1
* `Top K` - 100
* `Top P` - .95
Repeat your first interaction, noting the differences in model output. Less "likely" or common words were hopefully selected!
When satisfied, lets next set our inference parameters to the following:
* `Temperature` - 2
* `Top K` - 400
* `Top P` - .95
The model this time likely has gone off the rails, answering for an extended period of time, and trailing off incoherently. This is due to us increasing the likelihood of improbable tokens far beyond the expected performance thresholds google has set for us. Lets next test the opposite:
* `Temperature` - Default
* `Top K` - 1
* `Top P` - Default
Feel free to continue to explore with other topics or images. Note how each time we restart our conversation, the model gives us the exact same answer. This is because Top K limits the model to select only the single most likely token for the provided input! Even with this restriction however, note that the model can still provide different answers based on GPU differences, random fluctuations in the GPU hardware, or other similarly improbable events. Never forget that LLMs are deterministic, and even when highly restricted, can output unexpected results.
<br>
---
## Objective 4: Prompting Techniques
### Explore: Prompt Engineering
Next, lets review different ways we can coax a model to perform better, without having to perform fine tuning or parameter customization. We can do this by "priming" the model with our first prompt in a number of ways:
* Few Shot Prompting - Providing examples of our desired outcome up front
* Meta Prompting - Providing a guide to reach the desired outcome
* Chain of Thought - Providing the model guidance to think through its response
* Self Criticism - Asking the model to play "devil's' advocate" against itself
Each of these tools can be combined to help achieve a greater effect. Below is a suggested list of Magic the Gathering game design challenges which we can task Qwen 3.5 with, but each will require either some luck, or great prompt engineering. If you have a different topic you're more familiar with, feel free to first use Qwen 3.5 to adapt these challenges to a more familiar theme:
* Design a black rare creature card that fits thematically and mechanically into a Graveyard Matters set. Provide a few existing cards to help give the model a template.
* Design the same card, but this time outline the type, mechanics, tone, and identity
* Invent a new keyword. Have the model reason step by step how the keyword will work within the game
* Review your new keyword for game balance. Have the model challenge its decisions.
### Explore: System Prompting
There is one final prompting tool that we have yet to deep dive, which is system prompting. While the `chat controls` menu provides the option to override the default system prompt, Open WebUI provides a powerful flow for "creating" new models with saved system prompts and inference parameters. This is especially useful once we have created a system prompt that we especially prefer, or would like to set inference parameters once, and reuse them many times.
Let's create a new model by selecting the `Workspace` link, and then selecting the `+` button to create a new model:
<figure style="text-align: center;">
<a href="https://i.imgur.com/z502Ox2.png" target="_blank">
<img
src="https://i.imgur.com/z502Ox2.png"
style="width: 600px; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Create custom model
</figcaption>
</figure>
<br>
In the new model window, we can customize many different options for our model, even beyond the previously used chat specific controls. Create a new model named `Qwen 3.5 LLM Demo` by performing the following steps:
1. Set the name to `Qwen 3.5 LLM Demo`
2. Set the Base Model to `Qwen3.5:4b`
3. Provide a system prompt. You can set this to be any task you'd like model to focus on, or we can stick with our Magic the Gathering theme. Utilize the following prompt, or for bonus points, have Qwen 3.5 generate one for you.
```text
"You are a creative designer for Magic: The Gathering, tasked with generating new Sliver creature cards. Follow these guidelines to ensure the cards align with the game's mechanics and lore:
Card Outline Structure:
* Name: Give the Sliver a unique name that reflects its abilities or traits (e.g., 'Predatory Sliver', 'Aetherwing Sliver').
* Mana Cost: Assign a mana cost appropriate for the cards power level and complexity. Use standard Magic symbols (e.g., {1}{G}{U}).
* Type Line: Always include 'Creature — Sliver' in the type line.
* Power/Toughness: Set values that balance the cards abilities.
* Abilities: Include one or more keyword abilities, triggered abilities, or static effects. Ensure they synergize with existing Sliver mechanics.
* Flavor Text (optional): Add a short, thematic quote or description to enhance the card's lore.
Sliver Mechanics:
Slivers are a tribe of creatures that share abilities among themselves. Include the phrase 'All Slivers have...' in the ability text to reflect this tribal synergy.
Abilities should be consistent with existing Sliver themes, such as combat enhancements, adaptability, or swarm tactics.
Balance and Creativity:
Ensure the card is balanced for gameplay while introducing innovative mechanics or flavor.
Example:
Name: Swiftwing Sliver
Mana Cost: {2}{W}
Type Line: Creature — Sliver
Power/Toughness: 2/2
Abilities: Flying, All Slivers have flying.
Flavor Text: 'The skies belong to the swift and the bold.'
When provided a name, generate a new Sliver card following this structure."
```
<figure style="text-align: center;">
<a href="https://i.imgur.com/fIz6Zx8.png" target="_blank">
<img
src="https://i.imgur.com/fIz6Zx8.png"
style="width: 600px; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
System Prompt Creation
</figcaption>
</figure>
<br>
4. To ensure only the best card generation, show the `Advanced Params` and set the following to add creativity:
* `Temperature` - 1.1
* `Top K` - 100
* `Top P` - .7
Note: While we haven't actively discussed them as a part of this lab, as you play with more advanced inference problems, you may also find the following parameters of interest:
* `Max Tokens` - Limit the possible length of a response to the desired number of tokens
* `num_gpu` - Manually override Ollama's built in layer offload determination. Useful for increasing performance on mixed GPU setups.
* `use_mlock` - Manually force Ollama to ensure all model components are kept within active memory. Useful for smaller systems.
<figure style="text-align: center;">
<a href="https://i.imgur.com/fsLK1zY.png" target="_blank">
<img
src="https://i.imgur.com/fsLK1zY.png"
style="width: 600px; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Custom Parameters
</figcaption>
</figure>
<br>
5. When done, hit save. We can now test creating new Sliver cards! Select our newly created model from the chat drop down, and try inventing a few names.
<br>
---
## Conclusion
Throughout this lab, we've explored the fascinating world of Open WebUI and prompt engineering. Let's summarize the key topics we've covered:
1. **Open WebUI Setup**: We learned how to set up and run Open WebUI in both Google Colaboratory and locally using Docker containers. This gave us hands-on experience with deploying LLM interfaces.
2. **Model Selection and Management**: We explored how to download and manage models like Qwen 3.5, understanding their resource requirements and capabilities. This taught us about the practical considerations of working with different model sizes.
3. **Inference Parameters**: We experimented with critical inference parameters including:
- Temperature: Controls randomness in output
- Top K: Limits token selection to top K most likely options
- Top P: Uses nucleus sampling based on cumulative probability
4. **Prompting Techniques**: We examined various prompting strategies:
- Few Shot Prompting: Providing examples of desired outputs
- Meta Prompting: Giving guidance to reach outcomes
- Chain of Thought: Encouraging step-by-step reasoning
- Self Criticism: Having the model evaluate its own responses
5. **System Prompting**: We created custom models with specific system prompts and parameter settings, learning how to tailor LLM behavior for specialized tasks.
These concepts are foundational for effectively working with large language models in real-world applications. Remember that prompt engineering is both an art and a science - it requires understanding both the capabilities of the model and the nuances of human language. As you continue your journey with LLMs, don't hesitate to experiment with different approaches and parameters to find what works best for your specific use cases.
Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

@@ -0,0 +1,186 @@
<!-- breakout-style: instruction-rails -->
<!-- step-style: underline -->
<!-- objective-style: divider -->
# Lab 4 - Embedding and Chunking
In this lab, we will:
* Explore various chunking strategies
* Explore how embeddings & vectors allow similar concepts to "cluster" together within N-Dimensional spaces
* Explore a functional RAG application
To start
## Objective 1 Explore: Chunking Strategy
Chunking is the first step in any RAG pipeline. Chunking is the process of dividing our document into snippets that can then be stored within a database, paired with an embedded representation of that data. Because chunking occurs so early within the RAG process, the strategy chosen to create chunks of a document proves critical to the eventual embeddings which will be stored.
Successful chunking is hyper specific to the kinds of documents we wish to chunk. In real RAG pipeline production level development, we'd likely execute across a number of strategies against documents that we've analyzed for quality, bucketing them into various processing processes. However, we can at least get a rough idea of what affects chunking will have with a basic visualization.
First, ensure we've started our lab:
```bash
~/lab1/lab4_start.sh
```
And then, in a web browser, navigate to http://<STUDENT ASSIGNED SYSTEM IP>:3000. Once loaded, you should see the ChunkViz homepage.
<figure style="text-align: center;">
<a href="https://i.imgur.com/PG6fp1V.png" target="_blank">
<img
src="https://i.imgur.com/PG6fp1V.png"
style="width: 50%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
ChunkViz Default Page
</figcaption>
</figure>
<br>
Already, ChunkViz is populated with some example text. Additionally, the text has already been "chunked" according to a default, character based splitting strategy. In this case, every 200 characters is considered one chunk. We can modify chunk sizes by playing with the "Chunk Size" and "Chunk Overlap" sliders. Try changing those to 256 & 20 respectively.
<figure style="text-align: center;">
<a href="https://i.imgur.com/9SDyh7I.png" target="_blank">
<img
src="https://i.imgur.com/9SDyh7I.png"
style="width: 50%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Chunk Size & Overlap
</figcaption>
</figure>
<br>
Note how the colors in the text below dynamically change. Each color is a single chunk, with the "green" between each unique color the overlap. This overlap helps to increase the liklyhood that any given chunk will be properly selected.
Next, lets explore different chunking strategies. The major ones that we will cover are:
| Strategy | Description |
|---|---|
| Character Splitter | This default view splits chunks into characters of words. |
| Token Splitter | Split chunks based on their tokenization values (tokenization done by **tiktoken**). |
| Sentence Splitter | Split chunks into rough sizes based on the interpretation of what is a "sentence". |
| Recursive Character | Split chunks based on multiple possible separators, such as new lines (`\n`), periods (`.`), commas (`,`), or other relevant language section signifiers. |
Select each option, and observe some peculiarities in how ChonkViz breaks text into chunks.
<figure style="text-align: center;">
<a href="https://i.imgur.com/jWY4nOd.png" target="_blank">
<img
src="https://i.imgur.com/jWY4nOd.png"
style="width: 50%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Chunking Strategies
</figcaption>
</figure>
<br>
Each strategy comes with its own unique benefits and drawbacks. Character based splitting is often one of the easiest strategies to implement, as all input will utilize text characters for OCR. Token based splitting is useful when consistency in chunk size is imperative. Sentence & Recursive splitting are often better for preserving "complete thoughts", as humans often write in complete sentences, but not always.
Lets explore one more facet of chunking, this time through the process of how chunking might present itself against a novel. Open your provided copy of "Blindsight" by Peter Watts, in txt format. Paste the contents into ChonkViz. Once again, play with the sliders (anywhere from 64 up to 1024 chunk sizes) and strategies. Note how different chunk sizes split the novel in different ways.
<figure style="text-align: center;">
<a href="https://i.imgur.com/M51ASNK.png" target="_blank">
<img
src="https://i.imgur.com/M51ASNK.png"
style="width: 50%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Chapter 1 - 1024 Chunks, Recursive Character. This strategy nicely breaks paragraphs up.
</figcaption>
</figure>
<br>
Imagine how precise / difficult it may be to find specific sets of information depending on chunk size!
## Objective 2 Explore: Embedding Space
Now that weve seen some of the different trade-offs when chunking, we can move to the next major step of a RAG pipeline, embedding. As discussed during lecture, embedding is the process of converting text into a numerical representation that captures the "meaning" of the content. Instead of treating text as raw strings, embedding models map each chunk into a N-dimensional space where semantically similar content is vectored closer together.
This allows the system to perform similarity search efficiently: when a user submits a query, the query is also embedded into the same vector space, and the system retrieves the chunks whose embeddings are most closest together. This is in contrast to how embedding vectors are utilized within an LLM itself, I.E. for Attention and transformation via the Feed Forward network. In conclusion, this step is what enables a RAG system to move beyond simple keyword matching and instead retrieve information based on meaning and context.
Lets explore a real embedding space. Navigate to http://<STUDENT ASSIGNED SYSTEM IP>:5055. Here, we've started a project called Embedding Atlas. Embedding Atlas is a tool that provides interactive visualizations for datasets in parquet format. Each "chunk" in this case is one row in the dataset. It allows for us to visualize, cross-filter, and search embeddings and metadata in an interactive, manual way.
<figure style="text-align: center;">
<a href="https://i.imgur.com/8PvcZBP.png" target="_blank">
<img
src="https://i.imgur.com/8PvcZBP.png"
style="width: 50%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Embedding Atlas Flow Diagram
</figcaption>
</figure>
<br>
The lab4_start.sh script will have automatically started Embedding Atlas, as well as have performed embedding against each "Scenario" in our dataset. Scenarios in this case are 1-3 sentence snippets describing an action taken by an attacker.
<figure style="text-align: center;">
<a href="https://i.imgur.com/9bGQce8.png" target="_blank">
<img
src="https://i.imgur.com/9bGQce8.png"
style="width: 50%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Embedding Atlas CLI (Backend, EXAMPLE ONLY)
</figcaption>
</figure>
<br>
Our Embedding Atlas has already been pre-loaded with the main dataset we'll be using throughout the rest of today. Specifically, this is a dataset that matches "hacker scenarios" with MITRE ATT&CK Tactics, Technique, and Procedural IDs. If you're unfamiliar with ATT&CK, it is primarily a project that attempts to categorize and organize the possible ways a hacker might attempt to execute malware, pivot throughout a network, and eventually, act on their objectives (often ransomware). ATT&CK also provides us with a rich example and corpus of data that we can use to visualize the embedding process.
To help us visualize groups more clearly, before we start, please be sure to select "TTP_Name" from the dropdown in the top left.
<figure style="text-align: center;">
<a href="https://i.imgur.com/996ukgZ.png" target="_blank">
<img
src="https://i.imgur.com/996ukgZ.png"
style="width: 50%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
TTP_Name Grouping
</figcaption>
</figure>
<br>
Each color is a semantically similar concept, as defined by the embeddings generated during test processing. We can dynamically explore this embedding space through a few options:
1. Select the text categories on the right side. This will visually show only entries that match that category's organization
2. Alternatively, select any of the categories in the column on the right. This will perform the same function, exclusively showing only entries for the relevant ID
Note: You can use your mouse wheel to zoom in and out. Additionally, click and drag the map around with your left click to center areas you deem of interest.
<figure style="text-align: center;">
<a href="https://i.imgur.com/YkSqT4v.png" target="_blank">
<img
src="https://i.imgur.com/YkSqT4v.png"
style="width: 50%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Single Visible Category - System Information Discovery
</figcaption>
</figure>
<br>
Explore how the various categories naturally cluster together within the embedding space. If we, as a user, were to use this embedding space as a part of a RAG pipeline, an LLM could embed the words in our query in a similar manner, and surface the semantically similar ideas within our dataset back to us.
Lets visualize similarity in one other way:
3. Select any single dot, and click "Nearest Neighbor". Embedding Atlas will show us the specific datapoints that embed the closest to our selected datapoint. Notice how some of the nearest datapoints appear very distant! Think about why this might be. We'll discuss in review of this lab why.
<figure style="text-align: center;">
<a href="https://i.imgur.com/zKa6GxD.png" target="_blank">
<img
src="https://i.imgur.com/zKa6GxD.png"
style="width: 50%; display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
</a>
<figcaption style="margin-top: 8px; font-size: 1.1em;">
Nearest Neighbors
</figcaption>
</figure>
<br>
If you'd like to continue to explore alternative datasets and see how embeddings can flexibly cluster raw data, feel free to take a look at [Embedding Atlas' Examples Page](https://apple.github.io/embedding-atlas/examples/). In particular, take a look at the Wine dataset until class resumes.
## Objective 3 Explore: Full RAG Exploration
Binary file not shown.

After

Width:  |  Height:  |  Size: 278 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 216 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 323 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 353 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 333 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 792 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 632 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 294 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,481 @@
<!-- breakout-style: instruction-rails -->
<!-- step-style: underline -->
<!-- objective-style: divider -->
# Lab 5 - Dataset Generation and Fine Tuning
In this lab, we will:
* Explore public datasets
* Generate a dataset with Kiln.ai
* Fine-tune Gemma3 with LLaMA Factory
## Objective 1 Explore: Public Datasets
While fine tunes may not have the same level of impact as in the early days of LLMs, they can still provide hyper specialized capabilities to enable small LLMs such as those we've used throughout the course to compete with large, closed LLMs such as ChatGPT and Gemini. For use cases where data needs to be private, where the costs of a closed model are too high, or we want a model that is focused for a specific RAG dataset.
There are multiple ways to generate a useful dataset, including but not limited to:
| # | Method | Typical usecase | Key advantage |
|---|--------|-----------------|----------------|
| 1 | **Manual data collection** | Surveys, interviews, domainexpert annotation | Highest specificity; fully controlled quality |
| 2 | **Web scraping** | Harvesting public articles, forum posts, code snippets | Scalable; leverages existing web content |
| 3 | **APIs & databases** | Accessing structured resources (e.g., Wikipedia API, PubMed) | Structured data; often welldocumented |
| 4 | **Crowdsourcing** | Largescale labeling (e.g., image bounding boxes) | Costeffective for repetitive tasks |
| 5 | **Data augmentation** | Expanding a small set of images or text | Improves diversity without new collection |
| 6 | **Public datasets** | Readymade corpora from repositories like HuggingFace | Immediate availability; often preprocessed |
| 7 | **Synthetic data generation** | Simulated sensor readings, procedurally generated text | Useful when real data is scarce or sensitive |
Let's at least quickly touch on option 6, **Public Datasets**. While they may vary in quality, they're a great way to jumpstart a particular focus for a fine tune. Many are found on https://huggingface.co/datasets, and we can see there are over 400k datasets readily accessible for many different tasks, from many different providers, including [OpenAI](https://huggingface.co/datasets/openai/gsm8k), [Nvidia](https://huggingface.co/datasets/nvidia/Nemotron-CrossThink), and more. Much like with models, there are numerous tools we can utilize to filter these datasets, such as on format, modality, or license.
<figure style="text-align: center;">
<img
src="https://i.imgur.com/kdnBCyL.png"
width="600"
style="display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
<figcaption style="margin-top: 8px; font-size: 1.1em; ">
Example Datasets.
</figcaption>
</figure>
#### Explore a dataset (GSM8K)
Navigate to [GSM8K](https://huggingface.co/datasets/openai/gsm8k). Much like how models have **model cards**, datasets have **dataset cards**. These perform a similar job, providing:
1. Tags
2. Example data & a *Data Studio* button for interacting with the dataset on **HuggingFace** directly.
3. Easy Download Links (although we can also use `git clone`)
4. The Description
<figure style="text-align: center;">
<img
src="https://huggingface.co/datasets/openai/gsm8k/resolve/main/docs/assets/gsm8k-card.png"
width="600"
style="display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
<figcaption style="margin-top: 8px; font-size: 1.1em; ">
Dataset Model Card Contents.
</figcaption>
</figure>
At the heart of each data set is the pairing of *input* and *result*. In the case of math, this is relatively easy, as these are quite literally *question* and *answer* pairs to math problems.
Larger datasets, such as [Fineweb](https://huggingface.co/datasets/HuggingFaceFW/fineweb), utilize more complicated structures, but all still fundamentally follow this same principle. In the case of [Fineweb](https://huggingface.co/datasets/HuggingFaceFW/fineweb), the inputs are titles and summaries of web pages, with links to the precise web page as scraped from the internet. Feel free to explore a subset of this **15 Trillion Token** dataset below:
<div style="text-align: center; width: 100%;">
<iframe
src="https://huggingface.co/datasets/HuggingFaceFW/fineweb/embed/viewer/sample-10BT/train"
frameborder="0"
width="100%"
height="600px"
style="max-width: 100%; border: 1px solid #ddd; border-radius: 4px;"
></iframe>
</div>
#### Openweight vs. opensource
One last note on public datasets. A common misconception is that *open weight* models are **open source**.
- **Openweight** models (e.g., Gemma, DeepSeekR1, Qwen) provide publicly released checkpoints but **do not** include permissive sourcecode licenses.
- **True opensource** LLMs remain rare; the only notable example at time of writing is **INTELLECT2**, which was built via a distributed "SETI@Homestyle" effort.
Unfortunately, **INTELLECT2** does not favorably compare to existing *open weight* models such as **Gemma**, **DeepSeek R1**, **Qwen**, or other bleeding edge models. When using these *open weight* models for corporate purposes, review the license!
<br>
---
## Objective 2: Synthetic Dataset Generation
If you can, I strongly encourage you to try and find ready made, or easily massaged datasets that do not require synthetic data. You'll often obtain better results with less effort this way. Afterall, the original frontier ChatGPT family of models merely scraped the entire internet, every book, scientific papers, and other "pre made" raw data to help generate their first dataset. However, this is often unrealistic, as at minimum, we need **1000** input-output pairs in order to begin fine tuning, so...
### Why Use Synthetic Data?
| Reason | Explanation |
|--------|-------------|
| **Data scarcity** | Niche domains (e.g., MITREATT&CK classification) often lack ≥ 1000 labeled examples. |
| **Scalability** | A single large model can produce thousands of examples in minutes, saving manual effort. |
| **Quality control** | By generating with a *larger* model than the target (e.g., Gemma12Bqat → Gemma4B), you can distill richer responses within specific domains. |
| **Iterative refinement** | Kiln lets you rate or repair each pair, turning noisy outputs into a clean training set. |
<div class="lab-callout lab-callout--warning">
<strong>Rule of Thumb:</strong> Never generate data with a model that is smaller than the model you plan to fine-tune.
</div>
---
### Execute: Install & Launch KilnAI
### 1. Install & Launch KilnAI
If you haven't yet, download [Kiln AI](https://github.com/Kiln-AI/Kiln) and run the installer for your OS.
1. **Open Kiln**. It should automatically go to `http://localhost:3000` in your browser.
2. Click **`Get Started`**.
<figure style="text-align:center;">
<img src="https://i.imgur.com/hJNehuE.png" width="400"
style="display:block; margin-left:auto; margin-right:auto; border:5px solid black;">
<figcaption>Welcome screen click "Get Started".</figcaption>
</figure>
3. Choose **`Continue`** (or **`Skip Tour`** if you prefer).
4. Dismiss the newsletter prompt (optional).
Kiln is now ready for configuration.
### 2. Connect Kiln to Ollama
1. In Kiln's lefthand **Providers** panel, click **`Connect`** under the Ollama entry.
<figure style="text-align:center;">
<img src="https://i.imgur.com/vEwUszl.png" width="600"
style="display:block; margin-left:auto; margin-right:auto; border:5px solid black;">
<figcaption>Connect to a local or remote Ollama instance.</figcaption>
</figure>
2. Click **`Continue`** to confirm the connection.
<div class="lab-callout lab-callout--info">
<strong>Tip:</strong> If you have access to a commercial LLM (for example, OpenAI GPT-4o), you can point Kiln to that endpoint for higher-quality synthetic data by replacing the Ollama URL in <strong>Providers → Connect</strong>.
</div>
---
### 3. Create a Kiln Project
1. Kiln will prompt you to **Create a Project**. Enter any descriptive name (e.g., `MITREATTACKFineTune`).
<figure style="text-align:center;">
<img src="https://i.imgur.com/8CLEp9s.png" width="400"
style="display:block; margin-left:auto; margin-right:auto; border:5px solid black;">
<figcaption>Name your project.</figcaption>
</figure>
2. Press **`Create`**. You are now inside the project workspace.
---
### 4. Define the FineTuning Task
1. Click **`Add Task`** and fill out the form with the details below.
* **Task name:** `ATT&CK Classification`
* **Goal:** "Finetune Gemma34B so it can map a textual scenario to the correct MITREATT&CK technique."
* **System prompt (autofilled):** Kiln will prepend this text to every generation request.
<figure style="text-align:center;">
<img src="https://i.imgur.com/43o2s0Y.png" width="400"
style="display:block; margin-left:auto; margin-right:auto; border:5px solid black;">
<figcaption>Task definition screen.</figcaption>
</figure>
2. Click **`Save Task`**. The task now appears in the lefthand **Tasks** list.
---
### 5. Kiln Main Interface Overview
| Sidebar item | Primary use |
|--------------|------------|
| **Run** | Manually generate one inputoutput pair at a time (useful for quick checks). |
| **Dataset** | View, edit, export, or import the entire collection of pairs. |
| **SyntheticData** | Bulkgenerate pairs using a model of your choice. |
| **Evals** | Run automatic evaluation against a heldout test set. |
| **Settings** | Projectlevel configuration (e.g., default model, output format). |
When you first open a project, Kiln lands on the **Run** page.
---
## 6 Manual Generation (Run Page)
1. In the **Run** view, set the parameters as shown below (you may substitute a larger model if your hardware permits).
<figure style="text-align:center;">
<img src="https://i.imgur.com/vvW0wjk.png" width="600"
style="display:block; margin-left:auto; margin-right:auto; border:5px solid black;">
<figcaption>Configure the Run settings.</figcaption>
</figure>
2. Type a **scenario description** (e.g., "An attacker dumps LSASS memory using Mimikatz") and click **`Run`**.
3. Kiln sends the prompt to the selected Ollama model (by default `gemma3:12bitqat`).
4. When the model returns an answer, you can **rate** it from 1 ★ to 5 ★.
*5 ★* → Accept and click **`Next`**.
*< 5 ★* → Click **`Attempt Repair`**, edit the response, then **`Accept Repair`** or **`Reject`**.
<figure style="text-align:center;">
<img src="https://i.imgur.com/wqVsYMk.png" width="600"
style="display:block; margin-left:auto; margin-right:auto; border:5px solid black;">
<figcaption>Rate a correct response with 5 ★.</figcaption>
</figure>
5. Repeat until you have a handful of highquality pairs. This manual step is optional but useful for seeding the dataset with "goldstandard" examples.
---
### 7 Bulk Synthetic Data Generation
#### 7.1 Open the Generator
1. In the sidebar, click **`Synthetic Data``Generate Fine-Tuning Data`**.
<figure style="text-align:center;">
<img src="https://i.imgur.com/l6OiUeP.png" width="600"
style="display:block; margin-left:auto; margin-right:auto; border:5px solid black;">
<figcaption>Enter the bulkgeneration workflow.</figcaption>
</figure>
#### 7.2 Generate TopLevel Topics
1. Click **`Add Topics`**. This will generate top level topics that follow broad MITRE ATT&CK categories.
2. Choose **`Gemma3:12bitqat`** (or any larger model you prefer).
3. Set **Number of topics** to **8** and click **`Generate`**.
<figure style="text-align:center;">
<img src="https://i.imgur.com/e6MvhSj.png" width="400"
style="display:block; margin-left:auto; margin-right:auto; border:5px solid black;">
<figcaption>Select model & number of topics.</figcaption>
</figure>
4. Review the generated list. Delete any unsatisfactory topics (hover → click the trash icon) or click **`Add Topics`** again to generate more. Alternatively, if additoinal depth is required, click **`Add Subtopics`** to drill down deeper into any of the high level topics created by Gemma initially.
<figure style="text-align:center;">
<img src="https://i.imgur.com/wHNv3Om.png" width="800"
style="display:block; margin-left:auto; margin-right:auto; border:5px solid black;">
<figcaption>Final set of 8 topics.</figcaption>
</figure>
#### 7.3 Create Input Scenarios for All Topics
1. With the topics selected, click **`Generate Model Inputs`**. Ensure **`Gemma3:12bitqat`** is still chosen, and then affirm your selection.
Kiln now asks the model to produce a short *scenario description* for each topic.
2. After the model finishes, review the generated inputs. You may edit any that look off.
#### 7.4 Generate Corresponding Outputs
1. Click **`Save All Model Outputs`**. Kiln now runs the model a second time—this time using each generated input as the prompt—to produce the *output* (the ATT&CK technique label).
<figure style="text-align:center;">
<img src="https://i.imgur.com/A47GRVr.png" width="800"
style="display:block; margin-left:auto; margin-right:auto; border:5px solid black;">
<figcaption>Produce the "output" side and store the pair.</figcaption>
</figure>
2. The full inputoutput pairs are automatically added to the project's dataset.
#### 7.5 Review the Completed Dataset
1. Switch to the **`Dataset`** tab.
2. You should see a table of 64 (8topics × 8samples) pairs. Clicking any row opens the same **Run** view, where you can **rate**, **repair**, or **delete** the pair.
<figure style="text-align:center;">
<img src="https://i.imgur.com/DnyXYJO.png" width="800"
style="display:block; margin-left:auto; margin-right:auto; border:5px solid black;">
<figcaption>Dataset overview with generated pairs.</figcaption>
</figure>
---
### 8. Dataset Export (Create a Fine-Tune)
1. Once you are satisfied with the dataset, you can export it to numerous forms of JSONL via the **Fine Tune → Create a Fine Tune** button.
2. Kiln will first ask what format it would like our data to be exported to. We can leave the default setting of *Download: OpenAI chat format (JSONL). Next, select *Create a New Fine-Tuning Dataset.*
3. Kiln supports splitting our generated data into a number of buckets, including *`Training`* *`Test`* and *`Validation`*. Each of these dataset segments is critical to a great fine tune, but at our generated 64 examples, we don't have the luxury of creating a split. As such, under **`Advanced Options`**, select *100% training*, and click *Create Dataset*.
<figure style="text-align:center;">
<img src="https://i.imgur.com/vp6jobS.png" width="400"
style="display:block; margin-left:auto; margin-right:auto; border:5px solid black;">
<figcaption>Dataset overview with generated pairs.</figcaption>
</figure>
4. We can ignore all further options, and select *Download Split*. A new .jsonl file will be saved!
---
## Objective 3: Fine Tuning with LLaMA Factory
There are many popular options for performing finetunes, although many have their drawbacks:
* [Unsloth](https://unsloth.ai) is the most popular solution, but currently does not support multi-gpu setups without a commercial license.
* [Axoltl](https://axolotl.ai) is built off of Unsloth, and does support multi-gpu setups, but often lags behind Unsloth in features and capability.
* Both these options are also CLI only. While not the end of the world, it does mean we need to learn how these tools work
While I encourage you to explore both of these tools, they are unfortunately out of the scope for this lab. Instead, we're going to use a project that tries to make these tools easier to use - [LLaMaFactory](https://github.com/hiyouga/LLaMA-Factory). To do so, we'll need to perform some additional setup of our lab environment:
### Explore: Touring LLaMa Factory
Although LLaMa Factory does its best to simplify the fine tuning process, there are still many dials and knobs to turn! Lets take a brief tour of the most important options:
1. Model Selection - This area allows us to select any model that we're interested in finetuning. LLaMa factory will handle downloading the FP16 version of the model from **HuggingFace** for us. Note that for fine tuning, while you can fine tune an already quantized model, you'll often obtain a better result as measured by perplexity by starting with the "raw" model.
2. Quantization Selection - Without much better hardware, we will usually be training **LoRA**s (Low-Rank Adapters). These will slightly nudge the parameters of the model in the direction we're interested in. If we need additional headroom, we can instead **quantize the base model** (e.g., reduce its precision from 16-bit to 4-bit) and then apply **LoRA** to the quantized model, generating a **QLoRA** (Quantized LoRA). This approach combines the efficiency of quantization with the parameter-efficiency of LoRA.
3. Dataset Selection - This is where we can utilize our custom made dataset. Unfortunately, adding these datasets is a rather manual effort. This lab has already pre-loaded our dataset for us, but the steps are listed in COME UP WITH SOMEHWERE TO DO THAT.
4. Train Settings - This is where we can configure exactly how our model will be trained. The majority of these settings can stay default, until you've a specific need that pushes you down the rabbit hole. In particular, we'll be interested in
* **Learning Rate** - Controls how large an adjustment to the model's weights are made during each step
* **Epoch** - Determines the number of times the training algorithm will iterate over the entire dataset (aka repeats training 3 times by default). Critical to help avoid under or over fitting.
* **Cutoff length** - Equivalent to Ollama's context. As always, larger context training requires more memory.
* **Batch Size** - Can speed up training, as long as we have the hardware to support.
* **Warmup Steps** - The number of initial training steps during which the learning rate gradually increases to the set target. Helps with stability.
<figure style="text-align: center;">
<img
src="https://i.imgur.com/zbQ17cp.png"
width="800"
style="display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
<figcaption style="margin-top: 8px; font-size: 1.1em; ">
Fine Tuning Settings
</figcaption>
</figure>
### Execute: LLaMa Factory Fine Tuning
Set the following before we start to fine tune Gemma:
1. **Model**: `Gemma-3-4B`
2. **Chat template**: `Gemma3`
3. **Learning Rate**: `5e-6`
4. **Dataset**: `mitre`
5. **Warmup Steps**: `100`
* Scroll to the bottom of the page, and click `Preview command`. The WebUI is merely a front end for constructuing `llamafactory-cli` commands, and this shows exactly what will be run.
* When done reviewing, next click `Start`. It will take some time for LLaMa Factory to start its process, as it will first need to download the full `FP16` raw `Gemma-3-4B` model files.
<figure style="text-align: center;">
<img
src="https://i.imgur.com/r7dfG2k.png"
width="600"
style="display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
<figcaption style="margin-top: 8px; font-size: 1.1em; ">
LLaMa Factory CLI Generated Command & Start
</figcaption>
</figure>
**Monitor the loss graph** | The graph is measuring **Loss** per **Training step** (roughly 8k steps, 2.5k examples * 3 epochs), or put simply, how different the model's predicted answer is from our data. This should gudually, logarithmically slope downwards if training is working.
#### What to Look for in the Loss Curve
- **Steady decline** → model is learning.
- **Rapid flattening early** → learningrate may be too low or the model is underparameterized.
- **Very flat near the end** → possible overfitting; consider reducing the number of epochs or adding regularization.
If the curve behaves unexpectedly, you can stop the job, adjust the **learningrate** or **warmup steps**, and restart from the latest checkpoint.
<div style="
display: flex;
justify-content: center;
align-items: flex-start;
gap: 32px;
width: 100%;
max-width: 1200px;
margin: 0 auto;
padding: 10px;
box-sizing: border-box;
">
<div style="text-align: center; flex: 0 0 auto;">
<img
src="https://i.imgur.com/4n6G3Db.png"
width="700px"
style="display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
<div style="margin-top: 8px; font-size: 1.1em; text-align: center;">
LLaMa Factory Fine Tuning View
</div>
</div>
<div style="text-align: center; flex: 0 0 auto;">
<img
src="https://i.imgur.com/9NYEjpA.png"
width="400px"
style="display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
<div style="margin-top: 8px; font-size: 1.1em; text-align: center;">
Loss Curve Upclose
</div>
</div>
</div>
Once completed, we can scroll back up and
1. Select Chat
2. Select our newly trained **LoRA** checkpoint. This name of this checkpoint will match the date that you performed the lab.
3. Click `Load Model`
<figure style="text-align: center;">
<img
src="https://i.imgur.com/Z2Hpa2S.png"
width="600"
style="display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
<figcaption style="margin-top: 8px; font-size: 1.1em; ">
Load Model for Chat
</figcaption>
</figure>
Scrolling down will show all the options for interaction with the model, as we'd expect in most other interfaces. We have options for changing inference perameters, such as Top-P or Temperature, as well as a location for us to input our system prompt. If we're looking to test the model's accuracy with our fine tune, we normally need to ensure these values match the desired endstate values as closely as possible, but we're only going to set the system prompt, as that is most critical for our finetune.
Set the system prompt to the one we selected when using **Kiln.ai** - "Given a description of an attack technique, tactic, or procedure, the model should return only a MITRE ATTACK ID and Name."
| Test Prompt | Expected Output Format |
|------------|------------------------|
| "A malicious actor uses PowerShell to download a file from a remote server." | `T1059.001 PowerShell` |
| "The adversary exfiltrates data via a compressed archive sent over HTTP." | `T1567.001 Exfiltration Over Web Services` |
| "Credential dumping is performed using Mimikatz." | `T1003.001 LSASS Memory` |
<figure style="text-align: center;">
<img
src="https://i.imgur.com/ArMfy4j.png"
width="600"
style="display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
<figcaption style="margin-top: 8px; font-size: 1.1em; ">
Test prompt
</figcaption>
</figure>
If we're happy with our final model, lastly we can export the model for easy import into Ollama.
### Export the FineTuned Model
1. Switch to the **Export** tab.
2. Choose a directory on your local machine (or a mounted drive) where you want the exported files to live.
3. Select one of the following output formats:
- **FP16 Safetensors** a highquality checkpoint you can load again with LLaMAFactory or HuggingFace.
- **GGUF (4bit)** a compact file ready for import into **Ollama** or other GGUFcompatible runtimes.
<div style="
display: flex;
justify-content: center;
align-items: flex-start;
gap: 32px;
width: 100%;
max-width: 1200px;
margin: 0 auto;
padding: 10px;
box-sizing: border-box;
">
<div style="text-align: center; flex: 0 0 auto;">
<img
src="https://i.imgur.com/7rAbX33.png"
width="700px"
style="display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
<div style="margin-top: 8px; font-size: 1.1em; text-align: center;">
Export Model
</div>
</div>
<div style="text-align: center; flex: 0 0 auto;">
<img
src="https://i.imgur.com/5GBXu0i.png"
width="400px"
style="display: block; margin-left: auto; margin-right: auto; border: 5px solid black;">
<div style="margin-top: 8px; font-size: 1.1em; text-align: center;">
Local File Location
</div>
</div>
</div>
<br>
---
## Conclusion
In this lab, we completed a full fine-tuning workflow:
1. **Dataset Generation** - We explored public datasets on HuggingFace and used Kiln AI to generate a synthetic dataset for MITRE ATT&CK classification.
2. **Fine Tuning** - We used LLaMA Factory to fine-tune Gemma-3-4B on our generated dataset.
3. **Validation & Export** - We tested the model with sample prompts and exported the fine-tuned model in both FP16 and GGUF formats.
If all has gone well, then the model should be much more accurate at identifying MITRE ATT&CK codes from user input scenarios. If not, additional experimentation may be necessary to produce a good fine tune. Playing with the parameters we've discussed, improving and expanding our dataset, or even fine tuning a larger or better base model can also help affect our success rate.
Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

View File
+6
View File
@@ -0,0 +1,6 @@
/// <reference types="next" />
/// <reference types="next/image-types/global" />
/// <reference path="./.next/types/routes.d.ts" />
// NOTE: This file should not be edited
// see https://nextjs.org/docs/app/api-reference/config/typescript for more information.
+4
View File
@@ -0,0 +1,4 @@
/** @type {import("next").NextConfig} */
const config = {};
export default config;
+6913
View File
File diff suppressed because it is too large Load Diff
+35
View File
@@ -0,0 +1,35 @@
{
"name": "notebook-conversion-site",
"version": "0.1.0",
"private": true,
"type": "module",
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start",
"lint": "next lint",
"typecheck": "tsc --noEmit",
"check": "npm run lint && npm run typecheck",
"format:check": "prettier --check \"**/*.{ts,tsx,js,jsx,md,mdx}\" --cache",
"format:write": "prettier --write \"**/*.{ts,tsx,js,jsx,md,mdx}\" --cache"
},
"dependencies": {
"gray-matter": "^4.0.3",
"micromark": "^4.0.2",
"next": "^15.0.1",
"react": "^18.3.1",
"react-dom": "^18.3.1"
},
"devDependencies": {
"@types/node": "^20.14.10",
"@types/react": "^18.3.3",
"@types/react-dom": "^18.3.0",
"eslint": "^8.57.1",
"eslint-config-next": "^15.0.1",
"postcss": "^8.4.39",
"prettier": "^3.3.2",
"prettier-plugin-tailwindcss": "^0.6.5",
"tailwindcss": "^3.4.3",
"typescript": "^5.5.3"
}
}
+5
View File
@@ -0,0 +1,5 @@
export default {
plugins: {
tailwindcss: {},
},
};
+4
View File
@@ -0,0 +1,4 @@
/** @type {import('prettier').Config & import('prettier-plugin-tailwindcss').PluginOptions} */
export default {
plugins: ["prettier-plugin-tailwindcss"],
};
Binary file not shown.

After

Width:  |  Height:  |  Size: 157 KiB

BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 1011 B

+526
View File
@@ -0,0 +1,526 @@
import { notFound } from "next/navigation";
import { micromark } from "micromark";
import { LabContent } from "~/components/labs/LabContent";
import { getLabDocument, getLabSummaries } from "~/lib/labs";
type ObjectiveStyle = "divider" | "cards" | "rail";
const objectiveStyles = new Set<ObjectiveStyle>(["divider", "cards", "rail"]);
type StepStyle = "underline" | "pills" | "blocks";
const stepStyles = new Set<StepStyle>(["underline", "pills", "blocks"]);
type BreakoutStyle = "none" | "panel" | "workflow" | "command-pills" | "instruction-rails";
const breakoutStyles = new Set<BreakoutStyle>([
"none",
"panel",
"workflow",
"command-pills",
"instruction-rails",
]);
function normalizeObjectiveStyle(style: unknown): ObjectiveStyle | null {
if (typeof style !== "string") return null;
const normalized = style.trim().toLowerCase() as ObjectiveStyle;
return objectiveStyles.has(normalized) ? normalized : null;
}
function normalizeStepStyle(style: unknown): StepStyle | null {
if (typeof style !== "string") return null;
const normalized = style.trim().toLowerCase() as StepStyle;
return stepStyles.has(normalized) ? normalized : null;
}
function normalizeBreakoutStyle(style: unknown): BreakoutStyle | null {
if (typeof style !== "string") return null;
const normalized = style.trim().toLowerCase() as BreakoutStyle;
return breakoutStyles.has(normalized) ? normalized : null;
}
function extractObjectiveStyleDirective(markdown: string) {
const styleDirectivePattern = /<!--\s*objective-style:\s*([a-z-]+)\s*-->/i;
const match = styleDirectivePattern.exec(markdown);
const style = normalizeObjectiveStyle(match?.[1]);
if (!match) {
return { markdown, style };
}
const cleanedMarkdown = markdown.replace(styleDirectivePattern, "").trimStart();
return { markdown: cleanedMarkdown, style };
}
function extractBreakoutStyleDirective(markdown: string) {
const styleDirectivePattern = /<!--\s*breakout-style:\s*([a-z-]+)\s*-->/i;
const match = styleDirectivePattern.exec(markdown);
const style = normalizeBreakoutStyle(match?.[1]);
if (!match) {
return { markdown, style };
}
const cleanedMarkdown = markdown.replace(styleDirectivePattern, "").trimStart();
return { markdown: cleanedMarkdown, style };
}
function extractStepStyleDirective(markdown: string) {
const styleDirectivePattern = /<!--\s*step-style:\s*([a-z-]+)\s*-->/i;
const match = styleDirectivePattern.exec(markdown);
const style = normalizeStepStyle(match?.[1]);
if (!match) {
return { markdown, style };
}
const cleanedMarkdown = markdown.replace(styleDirectivePattern, "").trimStart();
return { markdown: cleanedMarkdown, style };
}
function extractPlainText(htmlText: string) {
return htmlText
.replace(/<[^>]+>/g, "")
.replace(/&nbsp;/g, " ")
.trim();
}
function isObjectiveHeading(headingHtml: string) {
const plainText = extractPlainText(headingHtml);
return /^objective\b/i.test(plainText);
}
function stripObjectiveDividers(html: string) {
return html.replace(
/<hr\s*\/?>\s*(?=<h2(?:\s+[^>]*)?>\s*Objective\b)/gi,
"",
);
}
function transformOutsideDetails(
html: string,
transform: (safeHtml: string) => string,
) {
const detailsPattern = /<details(?:\s+[^>]*)?>[\s\S]*?<\/details>/gi;
const detailsBlocks: string[] = [];
const maskedHtml = html.replace(detailsPattern, (detailsBlock) => {
const token = `__DETAILS_BLOCK_${detailsBlocks.length}__`;
detailsBlocks.push(detailsBlock);
return token;
});
const transformedHtml = transform(maskedHtml);
return transformedHtml.replace(/__DETAILS_BLOCK_(\d+)__/g, (_, indexText: string) => {
const index = Number(indexText);
return detailsBlocks[index] ?? "";
});
}
function segmentObjectiveSections(html: string) {
const headingPattern = /<h2(?:\s+[^>]*)?>([\s\S]*?)<\/h2>/gi;
const headings: Array<{ index: number; text: string }> = [];
let match = headingPattern.exec(html);
while (match) {
headings.push({ index: match.index, text: match[1] ?? "" });
match = headingPattern.exec(html);
}
if (headings.length === 0) return html;
let output = "";
let cursor = 0;
for (let i = 0; i < headings.length; i++) {
const heading = headings[i];
if (!isObjectiveHeading(heading.text)) continue;
const nextObjectiveHeading = headings.slice(i + 1).find((nextHeading) => {
return isObjectiveHeading(nextHeading.text);
});
const nextHeadingIndex = nextObjectiveHeading?.index ?? html.length;
output += html.slice(cursor, heading.index);
output += '<section class="objective-segment">';
output += html.slice(heading.index, nextHeadingIndex);
output += "</section>";
cursor = nextHeadingIndex;
}
output += html.slice(cursor);
return output;
}
function isStepHeading(headingHtml: string) {
const plainText = extractPlainText(headingHtml);
if (!plainText) return false;
return (
/^step\b/i.test(plainText) ||
/^\d+(?:\.\d+)*(?:[.):])?\s+/.test(plainText) ||
/\b(explore|execute|checkpoint|review)\b/i.test(plainText)
);
}
function getStepMode(headingHtml: string) {
const plainText = extractPlainText(headingHtml).toLowerCase();
if (plainText.includes("execute")) return "execute";
if (plainText.includes("explore")) return "explore";
if (plainText.includes("checkpoint")) return "checkpoint";
if (plainText.includes("review")) return "review";
return null;
}
function looksLikeCommandBlock(codeHtml: string) {
const codeText = codeHtml
.replace(/<[^>]+>/g, "")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&amp;/g, "&")
.replace(/&#39;/g, "'")
.replace(/&quot;/g, '"');
return (
/(^|\n)\s*(\$|sudo\s|git\s|python3?\s|pip\s|npm\s|pnpm\s|yarn\s|llama-|ollama\s|curl\s|wget\s|apt\s|cd\s|ls\s|cat\s|cp\s|mv\s|chmod\s|make\s)/i.test(
codeText,
) || /--[a-z0-9-]+/i.test(codeText)
);
}
function commandLinesToHtml(codeHtml: string) {
const lines = codeHtml
.split("\n")
.map((line) => line.trimEnd())
.filter((line) => line.length > 0);
if (lines.length === 0) {
return `<span class="cmd-pill">${codeHtml}</span>`;
}
return lines.map((line) => `<span class="cmd-pill">${line}</span>`).join("\n");
}
function markExplicitInstructionElements(
html: string,
options?: {
commandPills?: boolean;
},
) {
const renderCommandPills = options?.commandPills ?? false;
const imperativeLead =
/^(?:\d+\.\s*)?(?:open|go to|navigate to|click|select|run|enter|type|copy|paste|create|clone|convert|inspect|execute|use|download|install|review|confirm|kill|rerun|quit|start|stop)\b/i;
let markedHtml = html.replace(
/<pre([^>]*)>\s*<code([^>]*)>([\s\S]*?)<\/code>\s*<\/pre>/gi,
(fullMatch, rawPreAttrs: string, rawCodeAttrs: string, codeHtml: string) => {
if (!looksLikeCommandBlock(codeHtml)) return fullMatch;
const preAttrs = addClassAttribute(rawPreAttrs.trim(), "explicit-command-block");
const codeAttrs = addClassAttribute(rawCodeAttrs.trim(), "explicit-command");
const commandContent = renderCommandPills ? commandLinesToHtml(codeHtml) : codeHtml;
return `<pre${preAttrs ? ` ${preAttrs}` : ""}><code${codeAttrs ? ` ${codeAttrs}` : ""}>${commandContent}</code></pre>`;
},
);
markedHtml = markedHtml.replace(
/<(p|li)([^>]*)>([\s\S]*?)<\/\1>/gi,
(fullMatch, tagName: string, rawAttrs: string, innerHtml: string) => {
const plainText = extractPlainText(innerHtml);
if (!imperativeLead.test(plainText)) return fullMatch;
const attrs = addClassAttribute(rawAttrs.trim(), "explicit-instruction");
return `<${tagName}${attrs ? ` ${attrs}` : ""}>${innerHtml}</${tagName}>`;
},
);
return markedHtml;
}
function classifyStepKind(sectionHtml: string, mode: string | null) {
const hasExplicitInstruction = /class="[^"]*\bexplicit-instruction\b[^"]*"/i.test(sectionHtml);
const hasCommandBlock = /class="[^"]*\bexplicit-command-block\b[^"]*"/i.test(sectionHtml);
const hasOrderedList = /<ol>/i.test(sectionHtml);
if (mode === "execute" || mode === "checkpoint") return "instruction";
if (mode === "explore" || mode === "review") {
return hasCommandBlock ? "instruction" : "explanation";
}
if (hasCommandBlock || hasExplicitInstruction) return "instruction";
if (hasOrderedList) return "mixed";
return "explanation";
}
function findObjectiveSegmentOpenStart(html: string, headingIndex: number) {
const lookbackStart = Math.max(0, headingIndex - 120);
const beforeHeading = html.slice(lookbackStart, headingIndex);
const openTagMatch = /<section class="objective-segment">\s*$/i.exec(beforeHeading);
if (!openTagMatch) return headingIndex;
return headingIndex - openTagMatch[0].length;
}
function segmentStepSections(html: string) {
const headingPattern = /<h([2-4])([^>]*)>([\s\S]*?)<\/h\1>/gi;
const headings: Array<{
index: number;
level: number;
attrs: string;
text: string;
isStep: boolean;
}> = [];
let match = headingPattern.exec(html);
while (match) {
const level = Number(match[1] ?? "0");
const attrs = match[2] ?? "";
const text = match[3] ?? "";
const hasStepClass = /\bclass\s*=\s*"[^"]*\blab-step-title\b/i.test(attrs);
const isObjectiveLevelTwo = level === 2 && isObjectiveHeading(text);
const supportsStepSegmentation = level >= 3 || (level === 2 && !isObjectiveLevelTwo);
headings.push({
index: match.index,
level,
attrs,
text,
isStep: supportsStepSegmentation && (hasStepClass || isStepHeading(text)),
});
match = headingPattern.exec(html);
}
if (headings.length === 0) return html;
let output = "";
let cursor = 0;
for (let i = 0; i < headings.length; i++) {
const heading = headings[i];
if (!heading.isStep) continue;
let nextIndex = html.length;
for (let j = i + 1; j < headings.length; j++) {
const nextHeading = headings[j];
if (nextHeading.level === 2 || nextHeading.isStep) {
nextIndex = findObjectiveSegmentOpenStart(html, nextHeading.index);
break;
}
}
const sectionHtml = html.slice(heading.index, nextIndex);
const modeFromAttrs = /data-step-mode\s*=\s*"([^"]+)"/i.exec(heading.attrs)?.[1] ?? null;
const mode = modeFromAttrs ?? getStepMode(heading.text);
const kind = classifyStepKind(sectionHtml, mode);
const modeAttribute = mode ? ` data-step-mode="${mode}"` : "";
output += html.slice(cursor, heading.index);
output += `<section class="step-segment" data-step-kind="${kind}"${modeAttribute}>${sectionHtml}</section>`;
cursor = nextIndex;
}
output += html.slice(cursor);
return output;
}
function addClassAttribute(attrs: string, className: string) {
const classPattern = /class\s*=\s*"([^"]*)"/i;
if (classPattern.test(attrs)) {
return attrs.replace(classPattern, (_, classList: string) => {
const classes = classList.split(/\s+/).filter(Boolean);
if (!classes.includes(className)) classes.push(className);
return `class="${classes.join(" ")}"`;
});
}
return `${attrs} class="${className}"`.trim();
}
function addDataAttribute(attrs: string, name: string, value: string) {
const attrPattern = new RegExp(`\\b${name}\\s*=`, "i");
if (attrPattern.test(attrs)) return attrs;
return `${attrs} ${name}="${value}"`.trim();
}
function stripStepOrdinalPrefix(headingHtml: string) {
return headingHtml.replace(/^(\s*)(?:\d+(?:\.\d+)*(?:[.):])?\s+)/, "$1");
}
function annotateStepHeadings(
html: string,
options?: {
stripOrdinals?: boolean;
},
) {
const stripOrdinals = options?.stripOrdinals ?? false;
return html.replace(
/<h([2-4])([^>]*)>([\s\S]*?)<\/h\1>/gi,
(fullMatch, level: string, rawAttrs: string, headingHtml: string) => {
const numericLevel = Number(level);
if (numericLevel === 2 && isObjectiveHeading(headingHtml)) return fullMatch;
if (!isStepHeading(headingHtml)) return fullMatch;
let attrs = rawAttrs.trim();
attrs = addClassAttribute(attrs, "lab-step-title");
const mode = getStepMode(headingHtml);
if (mode) {
attrs = addDataAttribute(attrs, "data-step-mode", mode);
}
const displayHeadingHtml = stripOrdinals
? stripStepOrdinalPrefix(headingHtml)
: headingHtml;
return `<h${level}${attrs ? ` ${attrs}` : ""}>${displayHeadingHtml}</h${level}>`;
},
);
}
function splitTableRow(line: string) {
let row = line.trim();
if (row.startsWith("|")) row = row.slice(1);
if (row.endsWith("|")) row = row.slice(0, -1);
return row.split("|").map((cell) => cell.trim());
}
function isTableDivider(line: string) {
const cells = splitTableRow(line);
return cells.length > 0 && cells.every((cell) => /^:?-{3,}:?$/.test(cell));
}
function isTableRow(line: string) {
if (!line.includes("|")) return false;
return splitTableRow(line).length > 1;
}
function tableAlignment(dividerCell: string) {
if (/^:-+:$/.test(dividerCell)) return ' style="text-align:center;"';
if (/^-+:$/.test(dividerCell)) return ' style="text-align:right;"';
return "";
}
function renderInlineMarkdown(markdown: string) {
const html = micromark(markdown, { allowDangerousHtml: false }).trim();
if (html.startsWith("<p>") && html.endsWith("</p>")) {
return html.slice(3, -4);
}
return html;
}
function convertGfmTables(markdown: string) {
const lines = markdown.split("\n");
const out: string[] = [];
let inFence = false;
for (let i = 0; i < lines.length; i++) {
const line = lines[i] ?? "";
const fenceMatch = /^(```|~~~)/.exec(line.trim());
if (fenceMatch) {
inFence = !inFence;
out.push(line);
continue;
}
if (inFence) {
out.push(line);
continue;
}
const next = lines[i + 1];
if (next && isTableRow(line) && isTableDivider(next)) {
const header = splitTableRow(line);
const divider = splitTableRow(next);
const rows: string[][] = [];
let j = i + 2;
while (j < lines.length && isTableRow(lines[j] ?? "") && !isTableDivider(lines[j] ?? "")) {
rows.push(splitTableRow(lines[j]!));
j++;
}
const alignAttrs = header.map((_, idx) => tableAlignment(divider[idx] ?? ""));
const headHtml = header
.map((cell, idx) => `<th${alignAttrs[idx] ?? ""}>${renderInlineMarkdown(cell)}</th>`)
.join("");
const bodyHtml = rows
.map((row) => {
const cells = header.map((_, idx) => renderInlineMarkdown(row[idx] ?? ""));
return `<tr>${cells.map((cell, idx) => `<td${alignAttrs[idx] ?? ""}>${cell}</td>`).join("")}</tr>`;
})
.join("");
out.push("<table>");
out.push(`<thead><tr>${headHtml}</tr></thead>`);
out.push(`<tbody>${bodyHtml}</tbody>`);
out.push("</table>");
i = j - 1;
continue;
}
out.push(line);
}
return out.join("\n");
}
function markdownToHtml(markdown: string) {
return micromark(convertGfmTables(markdown), { allowDangerousHtml: true });
}
export async function generateStaticParams() {
return getLabSummaries().map((lab) => ({ slug: lab.slug }));
}
export default async function LabPage({
params,
}: {
params: Promise<{ slug: string }>;
}) {
const { slug } = await params;
const lab = getLabDocument(slug);
if (!lab) {
return notFound();
}
const { content, data } = lab;
const { markdown: markdownWithoutObjectiveDirective, style: objectiveDirectiveStyle } =
extractObjectiveStyleDirective(content);
const { markdown: markdownWithoutStepDirective, style: stepDirectiveStyle } =
extractStepStyleDirective(markdownWithoutObjectiveDirective);
const { markdown, style: breakoutDirectiveStyle } = extractBreakoutStyleDirective(
markdownWithoutStepDirective,
);
const styleConfig = data as {
objectiveStyle?: unknown;
stepStyle?: unknown;
breakoutStyle?: unknown;
};
const objectiveStyle =
normalizeObjectiveStyle(styleConfig.objectiveStyle) ?? objectiveDirectiveStyle ?? "divider";
const stepStyle = normalizeStepStyle(styleConfig.stepStyle) ?? stepDirectiveStyle ?? "underline";
const breakoutStyle =
normalizeBreakoutStyle(styleConfig.breakoutStyle) ?? breakoutDirectiveStyle ?? "none";
const objectiveSegmentedHtml = segmentObjectiveSections(
stripObjectiveDividers(markdownToHtml(markdown)),
);
const baseHtml = transformOutsideDetails(objectiveSegmentedHtml, (safeHtml) =>
annotateStepHeadings(safeHtml, {
stripOrdinals: breakoutStyle === "instruction-rails",
}),
);
const htmlContent =
breakoutStyle === "none"
? baseHtml
: transformOutsideDetails(baseHtml, (safeHtml) =>
segmentStepSections(markExplicitInstructionElements(safeHtml, {
commandPills: breakoutStyle === "command-pills",
})),
);
return (
<main className="mx-auto w-full max-w-5xl px-6 py-10">
<div className="mx-auto max-w-4xl rounded-lg border border-[#f6d5a5] bg-white p-6 md:p-8">
<h1 className="mb-6 text-3xl font-bold text-[#004E78]">{lab.title}</h1>
<LabContent
className={`lab-content max-w-none objective-style-${objectiveStyle} step-style-${stepStyle} breakout-style-${breakoutStyle}`}
html={htmlContent}
/>
</div>
</main>
);
}
+29
View File
@@ -0,0 +1,29 @@
import Link from "next/link";
import { getLabSummaries } from "~/lib/labs";
export default function LabsIndex() {
const labs = getLabSummaries();
return (
<main className="mx-auto w-full max-w-5xl px-6 py-10">
<div className="mb-6">
<h1 className="text-3xl font-bold text-[#004E78]">Open Security Labs</h1>
<p className="mt-2 text-slate-600">Browse converted markdown and MDX lab content.</p>
</div>
<div className="grid gap-4">
{labs.map((lab) => (
<Link
key={lab.slug}
href={`/labs/${lab.slug}`}
className="block rounded-lg border border-slate-200 bg-white p-6 transition hover:border-[#F89C27] hover:shadow-sm"
>
<h2 className="text-xl font-semibold text-[#004E78]">{lab.title}</h2>
{lab.description ? <p className="mt-2 text-slate-600">{lab.description}</p> : null}
</Link>
))}
</div>
</main>
);
}
+23
View File
@@ -0,0 +1,23 @@
import "~/styles/globals.css";
import type { Metadata } from "next";
import type { ReactNode } from "react";
import { SiteHeader } from "~/components/SiteHeader";
export const metadata: Metadata = {
title: "Open Security Labs",
description: "Open Security lab content and notebook conversions",
icons: [{ rel: "icon", url: "/logo.png" }],
};
export default function RootLayout({ children }: { children: ReactNode }) {
return (
<html lang="en">
<body className="bg-white text-slate-900">
<SiteHeader />
{children}
</body>
</html>
);
}
+56
View File
@@ -0,0 +1,56 @@
import Link from "next/link";
import { getLabSummaries } from "~/lib/labs";
export default function HomePage() {
const labs = getLabSummaries();
return (
<main className="mx-auto w-full max-w-5xl px-6 py-10">
<section className="rounded-xl border border-[#f6d5a5] bg-white p-8 shadow-sm">
<h1 className="mb-3 text-3xl font-bold text-[#004E78]">
Open Security Labs
</h1>
<p className="max-w-3xl text-slate-700">
Markdown-first lab workspace for Open Security notebook conversions.
</p>
<div className="mt-6 flex flex-wrap gap-3">
<Link
href="/labs"
className="inline-flex items-center rounded-md bg-[#004E78] px-4 py-2 text-sm font-semibold text-white hover:bg-[#003a5a]"
>
Browse all labs
</Link>
<Link
href="https://discord.gg/Ma9UZNBxvh"
className="inline-flex items-center rounded-md border border-[#F89C27] px-4 py-2 text-sm font-semibold text-[#004E78] hover:bg-[#F89C27] hover:text-white"
>
Open Security Discord
</Link>
</div>
</section>
<section className="mt-8">
<h2 className="mb-4 text-xl font-semibold text-[#004E78]">
Recent Labs
</h2>
<div className="grid gap-4 md:grid-cols-2">
{labs.slice(0, 6).map((lab) => (
<Link
key={lab.slug}
href={`/labs/${lab.slug}`}
className="block rounded-lg border border-slate-200 bg-white p-5 transition hover:border-[#F89C27] hover:shadow-sm"
>
<h3 className="text-lg font-semibold text-[#004E78]">
{lab.title}
</h3>
{lab.description ? (
<p className="mt-2 text-sm text-slate-600">{lab.description}</p>
) : null}
</Link>
))}
</div>
</section>
</main>
);
}
+29
View File
@@ -0,0 +1,29 @@
import Image from "next/image";
import Link from "next/link";
export function SiteHeader() {
return (
<header className="sticky top-0 z-20 border-b border-[#f8c27a] bg-white/95 shadow-sm backdrop-blur">
<div className="mx-auto flex w-full max-w-5xl items-center justify-between px-6 py-3">
<Link href="/" className="flex items-center" aria-label="Open Security home">
<Image src="/logo-full.png" alt="Open Security" width={150} height={40} priority />
</Link>
<nav className="flex items-center gap-5 text-sm font-semibold text-[#004E78]">
<Link href="/" className="hover:text-[#F89C27]">
Home
</Link>
<Link href="/labs" className="hover:text-[#F89C27]">
Labs
</Link>
<Link
href="https://discord.gg/Ma9UZNBxvh"
className="rounded-md border border-[#F89C27] px-3 py-1.5 text-[#004E78] hover:bg-[#F89C27] hover:text-white"
>
Discord
</Link>
</nav>
</div>
</header>
);
}
+241
View File
@@ -0,0 +1,241 @@
"use client";
import { useEffect, useRef, useState } from "react";
type LabContentProps = {
className: string;
html: string;
};
const cliLanguagePattern = /\b(language-(bash|sh|shell|zsh|console|terminal)|bash|shell|zsh)\b/i;
const cliCommandPattern =
/(^|\n)\s*(\$|sudo\s|git\s|python3?\s|pip\s|npm\s|pnpm\s|yarn\s|llama-|ollama\s|curl\s|wget\s|apt\s|cd\s|ls\s|cat\s|cp\s|mv\s|chmod\s|make\s)/i;
const promptLanguagePattern = /\b(language-(text|plaintext|md|markdown)|text|plaintext|markdown)\b/i;
const promptSignalPattern =
/\b(you are|guidelines|follow these|example|when provided|system prompt|tasked with)\b/i;
type ParsedSetting = {
key: string;
value: string;
};
type ZoomedImageState = {
src: string;
alt: string;
};
function looksLikeCliCommand(commandText: string, className: string) {
if (cliLanguagePattern.test(className)) return true;
return cliCommandPattern.test(commandText) || /--[a-z0-9-]+/i.test(commandText);
}
function looksLikePromptTextBlock(text: string, className: string) {
if (looksLikeCliCommand(text, className)) return false;
const normalizedText = text.trim();
if (!normalizedText) return false;
const lineCount = normalizedText.split("\n").length;
if (promptLanguagePattern.test(className) && normalizedText.length > 80) return true;
if (lineCount >= 4 && promptSignalPattern.test(normalizedText)) return true;
if (lineCount >= 6 && /(^|\n)\s*[*-]\s+/.test(normalizedText)) return true;
return false;
}
function escapeRegex(value: string) {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
function escapeHtml(value: string) {
return value
.replaceAll("&", "&amp;")
.replaceAll("<", "&lt;")
.replaceAll(">", "&gt;")
.replaceAll('"', "&quot;")
.replaceAll("'", "&#39;");
}
function parseSettingListItem(item: HTMLLIElement): ParsedSetting | null {
const keyElement = item.querySelector("code");
if (!keyElement) return null;
const key = (keyElement.textContent ?? "").replace(/\s+/g, " ").trim();
if (!key || key.length > 40) return null;
const text = (item.textContent ?? "").replace(/\s+/g, " ").trim();
const match = new RegExp(`^${escapeRegex(key)}\\s*(?:-||—|:|=)\\s*(.+)$`).exec(text);
if (!match) return null;
const value = (match[1] ?? "").replace(/\s+/g, " ").trim();
if (!value || value.length > 36) return null;
if (/[.;]/.test(value) && value.length > 16) return null;
return { key, value };
}
function enhanceSettingsLists(root: HTMLElement) {
const lists = root.querySelectorAll<HTMLUListElement>("ul");
for (const list of lists) {
if (list.dataset.settingsEnhanced === "true") continue;
const items = Array.from(list.children).filter((node): node is HTMLLIElement => {
return node.tagName === "LI";
});
if (items.length < 2) continue;
const parsedItems = items.map((item) => parseSettingListItem(item));
if (parsedItems.some((parsedItem) => parsedItem === null)) continue;
const settings = parsedItems as ParsedSetting[];
const compactValueCount = settings.filter((setting) => setting.value.length <= 20).length;
if (compactValueCount < Math.max(2, Math.ceil(settings.length * 0.66))) continue;
list.dataset.settingsEnhanced = "true";
list.classList.add("lab-settings-list");
for (let i = 0; i < items.length; i++) {
const item = items[i];
const setting = settings[i];
if (!item || !setting) continue;
item.classList.add("lab-settings-item");
item.innerHTML =
`<span class="lab-setting-key">${escapeHtml(setting.key)}</span>` +
`<span class="lab-setting-value">${escapeHtml(setting.value)}</span>`;
}
}
}
function ensureCopyButton(pre: HTMLPreElement, label: string) {
if (pre.dataset.copyEnhanced === "true") return;
pre.dataset.copyEnhanced = "true";
const copyButton = document.createElement("button");
copyButton.type = "button";
copyButton.className = "lab-copy-button";
copyButton.textContent = label;
copyButton.dataset.defaultLabel = label;
copyButton.setAttribute("aria-label", "Copy block to clipboard");
pre.appendChild(copyButton);
}
export function LabContent({ className, html }: LabContentProps) {
const containerRef = useRef<HTMLElement>(null);
const [zoomedImage, setZoomedImage] = useState<ZoomedImageState | null>(null);
useEffect(() => {
const root = containerRef.current;
if (!root) return;
const preBlocks = root.querySelectorAll<HTMLPreElement>("pre");
for (const pre of preBlocks) {
const code = pre.querySelector<HTMLElement>("code");
if (!code) continue;
const blockText = code.textContent ?? "";
if (looksLikeCliCommand(blockText, code.className)) {
pre.classList.add("lab-cli-shell");
ensureCopyButton(pre, "Copy");
continue;
}
if (looksLikePromptTextBlock(blockText, code.className)) {
pre.classList.add("lab-prompt-card");
ensureCopyButton(pre, "Copy Text");
}
}
enhanceSettingsLists(root);
const handleRootClick = (event: Event) => {
const target = event.target as HTMLElement;
const button = target.closest<HTMLButtonElement>("button.lab-copy-button");
if (button) {
const pre = button.closest("pre");
const code = pre?.querySelector("code");
const commandText = code?.textContent?.trimEnd();
if (!commandText) return;
const defaultLabel = button.dataset.defaultLabel ?? "Copy";
void navigator.clipboard.writeText(commandText).then(() => {
button.textContent = "Copied";
button.classList.add("is-copied");
window.setTimeout(() => {
button.textContent = defaultLabel;
button.classList.remove("is-copied");
}, 1200);
}).catch(() => {
button.textContent = "Failed";
window.setTimeout(() => {
button.textContent = defaultLabel;
}, 1200);
});
return;
}
const image = target.closest<HTMLImageElement>("img");
if (!image || !root.contains(image)) return;
const src = image.getAttribute("src");
if (!src) return;
event.preventDefault();
event.stopPropagation();
setZoomedImage({
src,
alt: image.getAttribute("alt") ?? "",
});
};
root.addEventListener("click", handleRootClick);
return () => {
root.removeEventListener("click", handleRootClick);
};
}, [html]);
useEffect(() => {
if (!zoomedImage) return;
const previousOverflow = document.body.style.overflow;
document.body.style.overflow = "hidden";
const activeElement = document.activeElement;
const previousFocusedElement = activeElement instanceof HTMLElement ? activeElement : null;
const handleEscape = (event: KeyboardEvent) => {
if (event.key === "Escape") {
setZoomedImage(null);
}
};
window.addEventListener("keydown", handleEscape);
return () => {
window.removeEventListener("keydown", handleEscape);
document.body.style.overflow = previousOverflow;
previousFocusedElement?.focus();
};
}, [zoomedImage]);
return (
<>
<article
ref={containerRef}
className={className}
dangerouslySetInnerHTML={{ __html: html }}
/>
{zoomedImage ? (
<div
className="lab-image-modal"
role="presentation"
onClick={() => setZoomedImage(null)}
>
<div className="lab-image-modal__surface" onClick={(event) => event.stopPropagation()}>
{/* eslint-disable-next-line @next/next/no-img-element */}
<img className="lab-image-modal__image" src={zoomedImage.src} alt={zoomedImage.alt} />
</div>
</div>
) : null}
</>
);
}
+100
View File
@@ -0,0 +1,100 @@
import fs from "fs";
import path from "path";
import matter from "gray-matter";
const CONTENT_DIR = path.join(process.cwd(), "content", "labs");
const CONTENT_EXTENSIONS = [".md", ".mdx"] as const;
export type LabSummary = {
slug: string;
title: string;
description: string;
fileName: string;
};
export type LabDocument = LabSummary & {
content: string;
data: Record<string, unknown>;
};
function toTitleCaseFromSlug(slug: string) {
return slug.replace(/-/g, " ").replace(/\b\w/g, (char) => char.toUpperCase());
}
function getSlugFromFileName(fileName: string) {
return fileName.replace(/\.(md|mdx)$/i, "");
}
function hasSupportedExtension(fileName: string) {
return CONTENT_EXTENSIONS.some((ext) => fileName.toLowerCase().endsWith(ext));
}
export function listLabFiles() {
if (!fs.existsSync(CONTENT_DIR)) {
return [];
}
return fs
.readdirSync(CONTENT_DIR)
.filter((fileName) => hasSupportedExtension(fileName))
.sort((a, b) => a.localeCompare(b));
}
export function getLabSummaries() {
return listLabFiles().map((fileName) => {
const filePath = path.join(CONTENT_DIR, fileName);
const source = fs.readFileSync(filePath, "utf8");
const { data } = matter(source);
const slug = getSlugFromFileName(fileName);
const title =
typeof data.title === "string" && data.title.trim().length > 0
? data.title
: toTitleCaseFromSlug(slug);
const description =
typeof data.description === "string" && data.description.trim().length > 0
? data.description
: "";
return {
slug,
title,
description,
fileName,
} satisfies LabSummary;
});
}
export function getLabDocument(slug: string): LabDocument | null {
const fileName = listLabFiles().find((candidateFileName) => {
return getSlugFromFileName(candidateFileName) === slug;
});
if (!fileName) {
return null;
}
const filePath = path.join(CONTENT_DIR, fileName);
const source = fs.readFileSync(filePath, "utf8");
const { content, data } = matter(source);
const title =
typeof data.title === "string" && data.title.trim().length > 0
? data.title
: toTitleCaseFromSlug(slug);
const description =
typeof data.description === "string" && data.description.trim().length > 0
? data.description
: "";
return {
slug,
title,
description,
fileName,
content,
data,
};
}
+854
View File
@@ -0,0 +1,854 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
/* Base CSS Element Custom Styles */
h1 {
font-size: 2.25rem;
line-height: 2.5rem;
margin-bottom: 10px;
color: #004E78;
}
h2 {
font-size: 1.875rem;
line-height: 2.25rem;
margin-bottom: 10px;
color: #004E78;
}
h3 {
font-size: 1.5rem;
line-height: 2rem;
margin-bottom: 10px;
color: #004E78;
}
h4 {
font-size: 1.25rem;
line-height: 1.75rem;
margin-bottom: 10px;
color: #004E78;
}
h5 {
font-size: 1.125rem;
line-height: 1.75rem;
margin-bottom: 10px;
color: #004E78;
}
p {
font-size: 1rem;
line-height: 1.5rem;
margin-bottom: 5px;
}
ul {
list-style-type: disc;
margin-left: 20px;
}
ol {
list-style-type: number;
margin-left: 20px;
}
/* End Basic Customizations */
@layer base {
:root {
--background: none;
--foreground: 0 0% 3.9%;
--card: 0 0% 100%;
--card-foreground: 0 0% 3.9%;
--popover: 0 0% 100%;
--popover-foreground: 0 0% 3.9%;
/* #F89C27 / 34, 94%, 56% // #004E78 / 201 100% 24% */
--primary: 34, 94%, 56%;
--primary-foreground: 0 0% 98%;
--secondary: 201 100% 24%;
--secondary-foreground: 0 0% 98%;
--muted: 0 0% 96.1%;
--muted-foreground: 0 0% 45.1%;
--accent: 0 0% 96.1%;
--accent-foreground: 0 0% 9%;
--destructive: 0 84.2% 60.2%;
--destructive-foreground: 0 0% 98%;
--border: 34, 94%, 56%;
--input: 0 0% 89.8%;
--ring: 34, 94%, 56%;
/* Keeping original chart colors */
--chart-1: 12 76% 61%;
--chart-2: 173 58% 39%;
--chart-3: 197 37% 24%;
--chart-4: 43 74% 66%;
--chart-5: 27 87% 67%;
--radius: 0.5rem;
}
.dark {
--background: 0 0% 3.9%;
--foreground: 0 0% 98%;
--card: 0 0% 3.9%;
--card-foreground: 0 0% 98%;
--popover: 0 0% 3.9%;
--popover-foreground: 0 0% 98%;
--primary: 0 0% 98%;
--primary-foreground: 0 0% 9%;
--secondary: 0 0% 14.9%;
--secondary-foreground: 0 0% 98%;
--muted: 0 0% 14.9%;
--muted-foreground: 0 0% 63.9%;
--accent: 0 0% 14.9%;
--accent-foreground: 0 0% 98%;
--destructive: 0 62.8% 30.6%;
--destructive-foreground: 0 0% 98%;
--border: 0 0% 14.9%;
--input: 0 0% 14.9%;
--ring: 0 0% 83.1%;
/* Keeping original chart colors */
--chart-1: 220 70% 50%;
--chart-2: 160 60% 45%;
--chart-3: 30 80% 55%;
--chart-4: 280 65% 60%;
--chart-5: 340 75% 55%;
}
}
@layer base {
* {
border-color: #e2e8f0;
}
body {
background-color: #ffffff;
color: #0f172a;
}
}
/* Keyframes for landing page */
@keyframes fadeIn {
0% {
opacity: 0;
}
100% {
opacity: 1;
}
}
@keyframes fadeOut {
0% {
opacity: 1;
}
100% {
opacity: 0;
}
}
.blur {
background: radial-gradient(circle, transparent 100%, black);
mix-blend-mode: multiply;
}
.bg-fiber-carbon {
background:
radial-gradient(black 10%, transparent 1%) 0 0,
radial-gradient(rgba(255, 255, 255, 0.1) 15%, transparent 10%) 8px 19px;
background-color: #ffff;
background-size: 36px 36px;
}
.progress {
animation: progress 1s infinite linear;
}
.left-right {
transform-origin: 0% 50%;
}
@keyframes progress {
0% {
transform: translateX(0) scaleX(0);
}
40% {
transform: translateX(0) scaleX(0.4);
}
100% {
transform: translateX(100%) scaleX(0.5);
}
}
.lab-content table {
width: 100%;
margin: 1rem 0;
border-collapse: collapse;
}
.lab-content th,
.lab-content td {
border: 1px solid #d1d5db;
padding: 0.55rem 0.7rem;
vertical-align: top;
}
.lab-content th {
background-color: #f3f4f6;
text-align: left;
}
.lab-content img {
cursor: zoom-in;
}
.lab-image-modal {
position: fixed;
inset: 0;
z-index: 90;
display: flex;
align-items: center;
justify-content: center;
padding: 1.25rem;
background: rgba(75, 85, 99, 0.82);
}
.lab-image-modal__surface {
max-width: 95vw;
max-height: 95vh;
}
.lab-image-modal__image {
display: block;
width: auto;
height: auto;
max-width: 95vw;
max-height: 95vh;
border-radius: 10px;
box-shadow: 0 18px 56px rgba(17, 24, 39, 0.5);
}
.lab-content .lab-callout {
margin: 1rem 0;
padding: 0.75rem 1rem;
border-left: 4px solid;
border-radius: 0.25rem;
}
.lab-content .lab-callout--warning {
border-left-color: #dc2626;
background-color: #fef2f2;
}
.lab-content .lab-callout--info {
border-left-color: #2563eb;
background-color: #eff6ff;
}
.lab-content .lab-callout--checkpoint {
border-left-color: #15803d;
background-color: #f0fdf4;
}
.lab-content pre.lab-cli-shell {
position: relative;
margin: 1rem 0;
padding: 1rem 1rem 0.85rem;
border: 1px solid #c8d9e8;
border-left: 5px solid #004e78;
border-radius: 10px;
background: #f4f9ff;
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.65);
overflow: auto;
}
.lab-content pre.lab-cli-shell::before {
content: "CLI";
position: absolute;
top: 0.42rem;
left: 0.7rem;
font-size: 0.66rem;
font-weight: 700;
letter-spacing: 0.08em;
color: #4a6477;
}
.lab-content pre.lab-cli-shell code {
display: block;
margin-top: 0.45rem;
font-size: 0.9rem;
line-height: 1.38;
}
.lab-content pre.lab-cli-shell .lab-copy-button {
position: absolute;
top: 0.36rem;
right: 0.4rem;
border: 1px solid #c3d4e5;
border-radius: 8px;
background: #ffffff;
color: #294e69;
font-size: 0.74rem;
font-weight: 600;
line-height: 1;
padding: 0.34rem 0.56rem;
cursor: pointer;
}
.lab-content pre.lab-cli-shell .lab-copy-button:hover {
background: #eef5fb;
}
.lab-content pre.lab-cli-shell .lab-copy-button.is-copied {
border-color: #88c09e;
color: #0f5d33;
background: #eaf8ef;
}
.lab-content pre.lab-prompt-card {
position: relative;
margin: 1rem 0;
padding: 1rem 1rem 0.92rem;
border: 1px solid #d7c7a7;
border-left: 5px solid #b77400;
border-radius: 10px;
background: #fffaf2;
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.7);
overflow: auto;
}
.lab-content pre.lab-prompt-card::before {
content: "Card Description";
position: absolute;
top: 0.42rem;
left: 0.7rem;
font-size: 0.66rem;
font-weight: 700;
letter-spacing: 0.08em;
color: #6c4a12;
}
.lab-content pre.lab-prompt-card code {
display: block;
margin-top: 0.82rem;
font-size: 0.9rem;
line-height: 1.4;
white-space: pre-wrap;
}
.lab-content pre.lab-prompt-card .lab-copy-button {
position: absolute;
top: 0.34rem;
right: 0.4rem;
border: 1px solid #9b5a00;
border-radius: 999px;
background: linear-gradient(180deg, #ffd18d, #f3a743);
color: #3d2401;
font-size: 0.72rem;
font-weight: 700;
letter-spacing: 0.04em;
text-transform: uppercase;
line-height: 1;
padding: 0.36rem 0.62rem;
cursor: pointer;
box-shadow: 0 1px 0 rgba(255, 255, 255, 0.35), 0 1px 2px rgba(61, 36, 1, 0.18);
}
.lab-content pre.lab-prompt-card .lab-copy-button:hover {
background: linear-gradient(180deg, #ffdb9f, #f5b459);
}
.lab-content pre.lab-prompt-card .lab-copy-button.is-copied {
border-color: #4f8d5f;
color: #08361a;
background: linear-gradient(180deg, #c8f2d4, #9edcb3);
}
.lab-content ul.lab-settings-list {
list-style: none;
margin: 0.9rem 0 1.2rem;
margin-left: 0;
padding: 0.22rem 0;
border: 1px solid #ccdeec;
border-left: 5px solid #0b72ba;
border-radius: 12px;
background: linear-gradient(180deg, #f9fcff, #f4f9fe);
box-shadow: 0 1px 0 rgba(15, 23, 42, 0.04);
}
.lab-content ul.lab-settings-list > li.lab-settings-item {
margin: 0;
padding: 0.5rem 0.85rem;
border: none;
border-bottom: 1px dashed #d5e3ef;
border-radius: 0;
background: transparent;
display: grid;
grid-template-columns: minmax(0, 1fr) auto;
align-items: center;
gap: 0.75rem;
}
.lab-content ul.lab-settings-list > li.lab-settings-item:last-child {
border-bottom: none;
}
.lab-content ul.lab-settings-list .lab-setting-key {
font-weight: 600;
color: #0b4e77;
letter-spacing: 0.01em;
}
.lab-content ul.lab-settings-list .lab-setting-value {
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono",
"Courier New", monospace;
font-size: 0.86rem;
font-weight: 600;
color: #1f425f;
border: none;
border-radius: 0;
background: transparent;
padding: 0;
white-space: nowrap;
}
.lab-content hr {
margin: 2rem 0 1.4rem;
border-color: #d7dee6;
}
.lab-content .objective-segment {
position: relative;
margin: 1.75rem 0;
}
.lab-content .objective-segment > :first-child {
margin-top: 0;
}
.lab-content .objective-segment > :last-child {
margin-bottom: 0;
}
.lab-content .objective-segment > h2 {
scroll-margin-top: 2rem;
}
.lab-content.objective-style-divider .objective-segment {
padding-top: 1.55rem;
}
.lab-content.objective-style-divider .objective-segment::before {
content: "";
position: absolute;
top: 0;
left: 0.5rem;
right: 0.5rem;
height: 1px;
background: linear-gradient(
90deg,
rgba(0, 78, 120, 0.08),
rgba(0, 78, 120, 0.45) 12%,
rgba(0, 78, 120, 0.45) 88%,
rgba(0, 78, 120, 0.08)
);
}
.lab-content.objective-style-cards .objective-segment {
margin: 1.5rem 0;
padding: 1rem 1.2rem 1.2rem;
border: 1px solid #d8e1ea;
border-left: 6px solid #f89c27;
border-radius: 12px;
background: #f9fbfd;
box-shadow: 0 1px 0 rgba(15, 23, 42, 0.04);
}
.lab-content.objective-style-cards .objective-segment > h2 {
margin-bottom: 0.75rem;
}
.lab-content.objective-style-rail .objective-segment {
margin: 1.8rem 0;
padding: 0.4rem 0 0.5rem 1.15rem;
border-left: 4px solid #004e78;
background: linear-gradient(90deg, rgba(0, 78, 120, 0.08), rgba(0, 78, 120, 0));
}
.lab-content.objective-style-rail .objective-segment > h2 {
margin-bottom: 0.75rem;
}
.lab-content.objective-style-rail .objective-segment > h2::after {
content: "";
display: block;
width: 4.5rem;
height: 0.2rem;
margin-top: 0.45rem;
border-radius: 999px;
background-color: #f89c27;
}
.lab-content .lab-step-title {
margin-top: 1.2rem;
margin-bottom: 0.55rem;
line-height: 1.35;
color: #003f61;
letter-spacing: 0.01em;
}
.lab-content .lab-step-title + p,
.lab-content .lab-step-title + ul,
.lab-content .lab-step-title + ol,
.lab-content .lab-step-title + pre,
.lab-content .lab-step-title + blockquote,
.lab-content .lab-step-title + figure {
margin-top: 0.45rem;
}
.lab-content.step-style-underline .lab-step-title {
padding-bottom: 0.35rem;
border-bottom: 1px solid #d9e3ec;
}
.lab-content.step-style-underline .lab-step-title[data-step-mode="execute"] {
border-bottom-color: #f8cc8f;
}
.lab-content.step-style-underline .lab-step-title[data-step-mode="explore"] {
border-bottom-color: #9bc9ee;
}
.lab-content.step-style-underline .lab-step-title[data-step-mode="checkpoint"] {
border-bottom-color: #86d4a4;
}
.lab-content.step-style-pills .lab-step-title {
display: flex;
flex-wrap: wrap;
align-items: center;
gap: 0.5rem;
padding: 0.42rem 0.8rem;
border-radius: 999px;
border: 1px solid #d8e2eb;
background: #f7fbff;
}
.lab-content.step-style-pills .lab-step-title::before {
content: attr(data-step-mode);
text-transform: capitalize;
font-size: 0.68rem;
font-weight: 700;
letter-spacing: 0.08em;
padding: 0.2rem 0.5rem;
border-radius: 999px;
color: #1a4f72;
background: #e2eef8;
}
.lab-content.step-style-pills .lab-step-title:not([data-step-mode])::before {
content: none;
}
.lab-content.step-style-pills .lab-step-title[data-step-mode="execute"]::before {
color: #8a4d00;
background: #fee7c7;
}
.lab-content.step-style-pills .lab-step-title[data-step-mode="explore"]::before {
color: #0f4970;
background: #d9ebfb;
}
.lab-content.step-style-pills .lab-step-title[data-step-mode="checkpoint"]::before {
color: #0e5e35;
background: #d9f7e4;
}
.lab-content.step-style-blocks .lab-step-title {
padding: 0.7rem 0.9rem;
border: 1px solid #d8e2eb;
border-left: 5px solid #004e78;
border-radius: 10px;
background: #f9fbfd;
box-shadow: 0 1px 0 rgba(15, 23, 42, 0.04);
}
.lab-content.step-style-blocks .lab-step-title[data-step-mode="execute"] {
border-left-color: #cc7a00;
background: #fffbf5;
}
.lab-content.step-style-blocks .lab-step-title[data-step-mode="explore"] {
border-left-color: #0b72ba;
background: #f6fbff;
}
.lab-content.step-style-blocks .lab-step-title[data-step-mode="checkpoint"] {
border-left-color: #198754;
background: #f6fffa;
}
.lab-content .step-segment {
position: relative;
margin: 1rem 0 1.35rem;
}
.lab-content .step-segment > :first-child {
margin-top: 0;
}
.lab-content .step-segment > :last-child {
margin-bottom: 0;
}
.lab-content .step-segment[data-step-kind]::before {
display: inline-block;
font-size: 0.63rem;
font-weight: 700;
letter-spacing: 0.08em;
text-transform: uppercase;
margin-bottom: 0.5rem;
}
.lab-content.breakout-style-panel .step-segment {
padding: 0.75rem 0.9rem 0.9rem;
border: 1px solid #dce7f1;
border-radius: 12px;
background: #f8fbfe;
}
.lab-content.breakout-style-panel .step-segment[data-step-kind="explanation"] {
border-left: 5px solid #2b7fbf;
background: #f5fafe;
}
.lab-content.breakout-style-panel .step-segment[data-step-kind="instruction"] {
border-left: 5px solid #d48806;
background: #fffaf2;
}
.lab-content.breakout-style-panel .step-segment[data-step-kind="mixed"] {
border-left: 5px solid #0e7490;
background: linear-gradient(90deg, #f6fbff, #fffbf5);
}
.lab-content.breakout-style-panel .step-segment[data-step-kind]::before {
color: #315168;
content: attr(data-step-kind);
}
.lab-content.breakout-style-workflow .step-segment {
padding: 0.45rem 0 0.65rem 1rem;
border-left: 2px dashed #c6d5e3;
}
.lab-content.breakout-style-workflow .step-segment[data-step-kind="explanation"] {
border-left-color: #6da9d8;
}
.lab-content.breakout-style-workflow .step-segment[data-step-kind="instruction"] {
border-left-color: #de9a2e;
}
.lab-content.breakout-style-workflow .step-segment[data-step-kind="mixed"] {
border-left-color: #4a95ab;
}
.lab-content.breakout-style-workflow .step-segment[data-step-kind]::before {
width: 1.2rem;
text-indent: -9999px;
overflow: hidden;
border-radius: 999px;
margin-left: -1.4rem;
margin-right: 0.35rem;
vertical-align: middle;
background: #6da9d8;
content: "";
}
.lab-content.breakout-style-workflow .step-segment[data-step-kind="instruction"]::before {
background: #de9a2e;
}
.lab-content.breakout-style-workflow .step-segment[data-step-kind="mixed"]::before {
background: #4a95ab;
}
.lab-content .explicit-command-block {
margin: 0.8rem 0;
}
.lab-content .explicit-command .cmd-pill {
display: block;
}
.lab-content.breakout-style-command-pills .step-segment {
padding: 0.3rem 0 0.45rem;
}
.lab-content.breakout-style-command-pills .step-segment[data-step-kind="instruction"] {
border-left: 3px solid #f0b45f;
padding-left: 0.75rem;
}
.lab-content.breakout-style-command-pills .step-segment[data-step-kind="explanation"] {
border-left: 3px solid #8dc1e7;
padding-left: 0.75rem;
}
.lab-content.breakout-style-command-pills .step-segment[data-step-kind="mixed"] {
border-left: 3px solid #6db0bf;
padding-left: 0.75rem;
}
.lab-content.breakout-style-command-pills .step-segment[data-step-kind]::before {
color: #4a6477;
content: attr(data-step-kind);
}
.lab-content.breakout-style-instruction-rails .step-segment {
padding: 0.3rem 0 0.45rem 0.9rem;
border-left: none;
overflow: clip;
}
.lab-content.breakout-style-instruction-rails .step-segment::after {
content: "";
position: absolute;
left: 0;
top: 0.45rem;
height: calc(100% - 1.05rem);
width: 4px;
border-radius: 999px;
background: #6db0bf;
}
.lab-content.breakout-style-instruction-rails .step-segment[data-step-kind="instruction"]::after {
background: #f0b45f;
}
.lab-content.breakout-style-instruction-rails .step-segment[data-step-kind="explanation"]::after {
background: #8dc1e7;
}
.lab-content.breakout-style-instruction-rails .step-segment[data-step-kind]::before {
color: #4a6477;
content: attr(data-step-kind);
}
.lab-content.breakout-style-instruction-rails .lab-step-title {
font-size: 1.25rem;
line-height: 1.7rem;
}
.lab-content.breakout-style-command-pills p.explicit-instruction,
.lab-content.breakout-style-command-pills li.explicit-instruction {
border-radius: 999px;
border: 1px solid #f2d2a0;
background: #fff8ec;
padding: 0.22rem 0.68rem;
}
.lab-content.breakout-style-command-pills ol > li.explicit-instruction::marker,
.lab-content.breakout-style-command-pills ul > li.explicit-instruction::marker {
color: #a66300;
font-weight: 700;
}
.lab-content.breakout-style-command-pills .explicit-command-block {
padding: 0.25rem 0;
border: none;
background: transparent;
}
.lab-content.breakout-style-command-pills .explicit-command {
display: flex;
flex-wrap: wrap;
gap: 0.35rem;
white-space: normal;
background: transparent;
padding: 0;
}
.lab-content.breakout-style-command-pills .explicit-command .cmd-pill {
display: inline-flex;
align-items: center;
border-radius: 999px;
border: 1px solid #e5bf85;
background: #fff1d8;
padding: 0.2rem 0.58rem;
line-height: 1.25;
font-size: 0.86rem;
}
.lab-content ul.concept-pill-list {
list-style: none;
margin: 0.9rem 0 1.2rem;
margin-left: 0;
padding: 0;
display: grid;
gap: 0.6rem;
}
.lab-content ul.concept-pill-list > li {
display: flex;
flex-wrap: wrap;
align-items: center;
gap: 0.55rem;
margin: 0;
padding: 0.48rem 0.78rem;
border: 1px solid #d5e2ee;
border-radius: 999px;
background: linear-gradient(180deg, #f9fcff, #f4f9fe);
}
.lab-content .concept-pill-label {
display: inline;
color: #0f4f76;
font-size: 0.86rem;
font-weight: 700;
letter-spacing: 0.01em;
text-transform: none;
line-height: 1.25;
}
@media (max-width: 640px) {
.lab-content.objective-style-cards .objective-segment {
padding: 0.9rem 1rem 1rem;
}
.lab-content.objective-style-rail .objective-segment {
padding-left: 0.85rem;
}
.lab-content.step-style-pills .lab-step-title {
border-radius: 12px;
padding: 0.45rem 0.65rem;
}
.lab-content.step-style-blocks .lab-step-title {
padding: 0.62rem 0.75rem;
}
.lab-content.breakout-style-panel .step-segment {
padding: 0.68rem 0.72rem 0.78rem;
}
.lab-content.breakout-style-workflow .step-segment,
.lab-content.breakout-style-command-pills .step-segment,
.lab-content.breakout-style-instruction-rails .step-segment {
padding-left: 0.6rem;
}
.lab-content ul.lab-settings-list > li.lab-settings-item {
grid-template-columns: 1fr;
gap: 0.28rem;
align-items: start;
padding: 0.52rem 0.75rem;
}
.lab-content ul.lab-settings-list .lab-setting-value {
justify-self: start;
}
.lab-content ul.concept-pill-list > li {
border-radius: 16px;
}
}
+9
View File
@@ -0,0 +1,9 @@
import type { Config } from "tailwindcss";
export default {
content: ["./src/**/*.{ts,tsx,mdx}"],
theme: {
extend: {},
},
plugins: [],
} satisfies Config;
+24
View File
@@ -0,0 +1,24 @@
{
"compilerOptions": {
"target": "es2022",
"lib": ["dom", "dom.iterable", "ES2022"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "ESNext",
"moduleResolution": "Bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [{ "name": "next" }],
"baseUrl": ".",
"paths": {
"~/*": ["./src/*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "archive"]
}