Initial local AI stack

2026-04-20 13:13:41 -05:00
commit 8a9a120d1f
4 changed files with 146 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,6 @@
 __pycache__/
 *.pyc
 *.log
 .env
 .env.*
 generated/
--- a/bootstrap.sh
+++ b/bootstrap.sh
@@ -0,0 +1,17 @@
 #!/usr/bin/env bash
 set -euo pipefail
 BASE_DIR="$(cd "$(dirname "$0")" && pwd)"
 "$BASE_DIR/scripts/probe.sh"
 python3 "$BASE_DIR/scripts/render-config.py"
 sudo systemctl daemon-reload
 sudo systemctl restart ollama
 echo
 echo "=== machine profile ==="
 cat "$BASE_DIR/generated/machine-profile.json"
 echo
 echo "=== continue config ==="
 echo "~/.continue/config.yaml updated"
--- a/scripts/probe.sh
+++ b/scripts/probe.sh
@@ -0,0 +1,31 @@
 #!/usr/bin/env bash
 set -euo pipefail
 BASE_DIR="$(cd "$(dirname "$0")/.." && pwd)"
 mkdir -p "$BASE_DIR/generated"
 THREADS="$(nproc)"
 RAM_GB="$(free -g | awk '/^Mem:/{print $2}')"
 CPU_MODEL="$(lscpu | awk -F: '/Model name/{gsub(/^[ \t]+/, "", $2); print $2; exit}')"
 GPU_VENDOR="none"
 GPU_VRAM_GB="0"
 if command -v nvidia-smi >/dev/null 2>&1; then
  GPU_VENDOR="nvidia"
  GPU_VRAM_GB="$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -n1 | awk '{printf "%.0f", $1/1024}')"
 elif command -v rocm-smi >/dev/null 2>&1; then
  GPU_VENDOR="amd"
 fi
 cat > "$BASE_DIR/generated/machine-profile.json" <<JSON
 {
  "cpu_model": "${CPU_MODEL}",
  "threads": ${THREADS},
  "ram_gb": ${RAM_GB},
  "gpu_vendor": "${GPU_VENDOR}",
  "gpu_vram_gb": ${GPU_VRAM_GB}
 }
 JSON
 cat "$BASE_DIR/generated/machine-profile.json"
--- a/scripts/render-config.py
+++ b/scripts/render-config.py
@@ -0,0 +1,92 @@
 #!/usr/bin/env python3
 import json
 from pathlib import Path
 base = Path.home() / "local-ai-stack"
 profile_path = base / "generated" / "machine-profile.json"
 out_path = Path.home() / ".continue" / "config.yaml"
 out_path.parent.mkdir(parents=True, exist_ok=True)
 profile = json.loads(profile_path.read_text())
 ram = profile["ram_gb"]
 gpu_vendor = profile["gpu_vendor"]
 gpu_vram = profile["gpu_vram_gb"]
 builder_model = "qwen2.5-coder:7b"
 autocomplete_model = "qwen2.5-coder:1.5b"
 planner_model = "qwen3.5:latest"
 critic_model = "deepseek-coder:6.7b"
 builder_context = 4096
 autocomplete_context = 2048
 planner_context = 4096
 critic_context = 4096
 if gpu_vendor == "nvidia" and gpu_vram >= 8:
    planner_context = 8192
 if ram < 16:
    planner_model = "qwen2.5-coder:7b"
    critic_model = "qwen2.5-coder:7b"
    planner_context = 4096
    critic_context = 4096
 config = f"""name: local-cursor-open
 version: 1.0.0
 schema: v1
 models:
  - name: Builder
    provider: ollama
    model: {builder_model}
    roles: [chat, edit, apply]
    capabilities:
      - tool_use
    defaultCompletionOptions:
      contextLength: {builder_context}
      maxTokens: 1200
      temperature: 0.1
      keepAlive: 300
  - name: Autocomplete
    provider: ollama
    model: {autocomplete_model}
    roles: [autocomplete]
    defaultCompletionOptions:
      contextLength: {autocomplete_context}
      maxTokens: 160
      temperature: 0.05
      keepAlive: 180
  - name: Planner
    provider: ollama
    model: {planner_model}
    roles: [chat]
    capabilities:
      - tool_use
      - image_input
    defaultCompletionOptions:
      contextLength: {planner_context}
      maxTokens: 1400
      temperature: 0.2
      keepAlive: 120
  - name: Critic
    provider: ollama
    model: {critic_model}
    roles: [chat]
    defaultCompletionOptions:
      contextLength: {critic_context}
      maxTokens: 1200
      temperature: 0.1
      keepAlive: 120
 rules:
  - Keep changes minimal.
  - Reuse existing patterns.
  - Do not refactor unrelated code.
  - Prefer plain-English answers unless asked for code.
 """
 out_path.write_text(config)
 print(f"Wrote {{out_path}}")