Initial local AI stack
This commit is contained in:
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.log
|
||||
.env
|
||||
.env.*
|
||||
generated/
|
||||
17
bootstrap.sh
Executable file
17
bootstrap.sh
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
BASE_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
|
||||
"$BASE_DIR/scripts/probe.sh"
|
||||
python3 "$BASE_DIR/scripts/render-config.py"
|
||||
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl restart ollama
|
||||
|
||||
echo
|
||||
echo "=== machine profile ==="
|
||||
cat "$BASE_DIR/generated/machine-profile.json"
|
||||
echo
|
||||
echo "=== continue config ==="
|
||||
echo "~/.continue/config.yaml updated"
|
||||
31
scripts/probe.sh
Executable file
31
scripts/probe.sh
Executable file
@@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
BASE_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
||||
mkdir -p "$BASE_DIR/generated"
|
||||
|
||||
THREADS="$(nproc)"
|
||||
RAM_GB="$(free -g | awk '/^Mem:/{print $2}')"
|
||||
CPU_MODEL="$(lscpu | awk -F: '/Model name/{gsub(/^[ \t]+/, "", $2); print $2; exit}')"
|
||||
|
||||
GPU_VENDOR="none"
|
||||
GPU_VRAM_GB="0"
|
||||
|
||||
if command -v nvidia-smi >/dev/null 2>&1; then
|
||||
GPU_VENDOR="nvidia"
|
||||
GPU_VRAM_GB="$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -n1 | awk '{printf "%.0f", $1/1024}')"
|
||||
elif command -v rocm-smi >/dev/null 2>&1; then
|
||||
GPU_VENDOR="amd"
|
||||
fi
|
||||
|
||||
cat > "$BASE_DIR/generated/machine-profile.json" <<JSON
|
||||
{
|
||||
"cpu_model": "${CPU_MODEL}",
|
||||
"threads": ${THREADS},
|
||||
"ram_gb": ${RAM_GB},
|
||||
"gpu_vendor": "${GPU_VENDOR}",
|
||||
"gpu_vram_gb": ${GPU_VRAM_GB}
|
||||
}
|
||||
JSON
|
||||
|
||||
cat "$BASE_DIR/generated/machine-profile.json"
|
||||
92
scripts/render-config.py
Executable file
92
scripts/render-config.py
Executable file
@@ -0,0 +1,92 @@
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
base = Path.home() / "local-ai-stack"
|
||||
profile_path = base / "generated" / "machine-profile.json"
|
||||
out_path = Path.home() / ".continue" / "config.yaml"
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
profile = json.loads(profile_path.read_text())
|
||||
ram = profile["ram_gb"]
|
||||
gpu_vendor = profile["gpu_vendor"]
|
||||
gpu_vram = profile["gpu_vram_gb"]
|
||||
|
||||
builder_model = "qwen2.5-coder:7b"
|
||||
autocomplete_model = "qwen2.5-coder:1.5b"
|
||||
planner_model = "qwen3.5:latest"
|
||||
critic_model = "deepseek-coder:6.7b"
|
||||
|
||||
builder_context = 4096
|
||||
autocomplete_context = 2048
|
||||
planner_context = 4096
|
||||
critic_context = 4096
|
||||
|
||||
if gpu_vendor == "nvidia" and gpu_vram >= 8:
|
||||
planner_context = 8192
|
||||
|
||||
if ram < 16:
|
||||
planner_model = "qwen2.5-coder:7b"
|
||||
critic_model = "qwen2.5-coder:7b"
|
||||
planner_context = 4096
|
||||
critic_context = 4096
|
||||
|
||||
config = f"""name: local-cursor-open
|
||||
version: 1.0.0
|
||||
schema: v1
|
||||
|
||||
models:
|
||||
- name: Builder
|
||||
provider: ollama
|
||||
model: {builder_model}
|
||||
roles: [chat, edit, apply]
|
||||
capabilities:
|
||||
- tool_use
|
||||
defaultCompletionOptions:
|
||||
contextLength: {builder_context}
|
||||
maxTokens: 1200
|
||||
temperature: 0.1
|
||||
keepAlive: 300
|
||||
|
||||
- name: Autocomplete
|
||||
provider: ollama
|
||||
model: {autocomplete_model}
|
||||
roles: [autocomplete]
|
||||
defaultCompletionOptions:
|
||||
contextLength: {autocomplete_context}
|
||||
maxTokens: 160
|
||||
temperature: 0.05
|
||||
keepAlive: 180
|
||||
|
||||
- name: Planner
|
||||
provider: ollama
|
||||
model: {planner_model}
|
||||
roles: [chat]
|
||||
capabilities:
|
||||
- tool_use
|
||||
- image_input
|
||||
defaultCompletionOptions:
|
||||
contextLength: {planner_context}
|
||||
maxTokens: 1400
|
||||
temperature: 0.2
|
||||
keepAlive: 120
|
||||
|
||||
- name: Critic
|
||||
provider: ollama
|
||||
model: {critic_model}
|
||||
roles: [chat]
|
||||
defaultCompletionOptions:
|
||||
contextLength: {critic_context}
|
||||
maxTokens: 1200
|
||||
temperature: 0.1
|
||||
keepAlive: 120
|
||||
|
||||
rules:
|
||||
- Keep changes minimal.
|
||||
- Reuse existing patterns.
|
||||
- Do not refactor unrelated code.
|
||||
- Prefer plain-English answers unless asked for code.
|
||||
"""
|
||||
|
||||
out_path.write_text(config)
|
||||
print(f"Wrote {{out_path}}")
|
||||
Reference in New Issue
Block a user