Initial local AI stack

2026-04-20 13:13:41 -05:00
commit 8a9a120d1f
4 changed files with 146 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,6 @@
+__pycache__/
+*.pyc
+*.log
+.env
+.env.*
+generated/
--- a/bootstrap.sh
+++ b/bootstrap.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+BASE_DIR="$(cd "$(dirname "$0")" && pwd)"
+
+"$BASE_DIR/scripts/probe.sh"
+python3 "$BASE_DIR/scripts/render-config.py"
+
+sudo systemctl daemon-reload
+sudo systemctl restart ollama
+
+echo
+echo "=== machine profile ==="
+cat "$BASE_DIR/generated/machine-profile.json"
+echo
+echo "=== continue config ==="
+echo "~/.continue/config.yaml updated"
--- a/scripts/probe.sh
+++ b/scripts/probe.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+BASE_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+mkdir -p "$BASE_DIR/generated"
+
+THREADS="$(nproc)"
+RAM_GB="$(free -g | awk '/^Mem:/{print $2}')"
+CPU_MODEL="$(lscpu | awk -F: '/Model name/{gsub(/^[ \t]+/, "", $2); print $2; exit}')"
+
+GPU_VENDOR="none"
+GPU_VRAM_GB="0"
+
+if command -v nvidia-smi >/dev/null 2>&1; then
+  GPU_VENDOR="nvidia"
+  GPU_VRAM_GB="$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -n1 | awk '{printf "%.0f", $1/1024}')"
+elif command -v rocm-smi >/dev/null 2>&1; then
+  GPU_VENDOR="amd"
+fi
+
+cat > "$BASE_DIR/generated/machine-profile.json" <<JSON
+{
+  "cpu_model": "${CPU_MODEL}",
+  "threads": ${THREADS},
+  "ram_gb": ${RAM_GB},
+  "gpu_vendor": "${GPU_VENDOR}",
+  "gpu_vram_gb": ${GPU_VRAM_GB}
+}
+JSON
+
+cat "$BASE_DIR/generated/machine-profile.json"
--- a/scripts/render-config.py
+++ b/scripts/render-config.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+import json
+from pathlib import Path
+
+base = Path.home() / "local-ai-stack"
+profile_path = base / "generated" / "machine-profile.json"
+out_path = Path.home() / ".continue" / "config.yaml"
+out_path.parent.mkdir(parents=True, exist_ok=True)
+
+profile = json.loads(profile_path.read_text())
+ram = profile["ram_gb"]
+gpu_vendor = profile["gpu_vendor"]
+gpu_vram = profile["gpu_vram_gb"]
+
+builder_model = "qwen2.5-coder:7b"
+autocomplete_model = "qwen2.5-coder:1.5b"
+planner_model = "qwen3.5:latest"
+critic_model = "deepseek-coder:6.7b"
+
+builder_context = 4096
+autocomplete_context = 2048
+planner_context = 4096
+critic_context = 4096
+
+if gpu_vendor == "nvidia" and gpu_vram >= 8:
+    planner_context = 8192
+
+if ram < 16:
+    planner_model = "qwen2.5-coder:7b"
+    critic_model = "qwen2.5-coder:7b"
+    planner_context = 4096
+    critic_context = 4096
+
+config = f"""name: local-cursor-open
+version: 1.0.0
+schema: v1
+
+models:
+  - name: Builder
+    provider: ollama
+    model: {builder_model}
+    roles: [chat, edit, apply]
+    capabilities:
+      - tool_use
+    defaultCompletionOptions:
+      contextLength: {builder_context}
+      maxTokens: 1200
+      temperature: 0.1
+      keepAlive: 300
+
+  - name: Autocomplete
+    provider: ollama
+    model: {autocomplete_model}
+    roles: [autocomplete]
+    defaultCompletionOptions:
+      contextLength: {autocomplete_context}
+      maxTokens: 160
+      temperature: 0.05
+      keepAlive: 180
+
+  - name: Planner
+    provider: ollama
+    model: {planner_model}
+    roles: [chat]
+    capabilities:
+      - tool_use
+      - image_input
+    defaultCompletionOptions:
+      contextLength: {planner_context}
+      maxTokens: 1400
+      temperature: 0.2
+      keepAlive: 120
+
+  - name: Critic
+    provider: ollama
+    model: {critic_model}
+    roles: [chat]
+    defaultCompletionOptions:
+      contextLength: {critic_context}
+      maxTokens: 1200
+      temperature: 0.1
+      keepAlive: 120
+
+rules:
+  - Keep changes minimal.
+  - Reuse existing patterns.
+  - Do not refactor unrelated code.
+  - Prefer plain-English answers unless asked for code.
+"""
+
+out_path.write_text(config)
+print(f"Wrote {{out_path}}")