Initial local AI stack

2026-04-20 13:13:41 -05:00
commit 8a9a120d1f
4 changed files with 146 additions and 0 deletions
--- a/scripts/render-config.py
+++ b/scripts/render-config.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+import json
+from pathlib import Path
+
+base = Path.home() / "local-ai-stack"
+profile_path = base / "generated" / "machine-profile.json"
+out_path = Path.home() / ".continue" / "config.yaml"
+out_path.parent.mkdir(parents=True, exist_ok=True)
+
+profile = json.loads(profile_path.read_text())
+ram = profile["ram_gb"]
+gpu_vendor = profile["gpu_vendor"]
+gpu_vram = profile["gpu_vram_gb"]
+
+builder_model = "qwen2.5-coder:7b"
+autocomplete_model = "qwen2.5-coder:1.5b"
+planner_model = "qwen3.5:latest"
+critic_model = "deepseek-coder:6.7b"
+
+builder_context = 4096
+autocomplete_context = 2048
+planner_context = 4096
+critic_context = 4096
+
+if gpu_vendor == "nvidia" and gpu_vram >= 8:
+    planner_context = 8192
+
+if ram < 16:
+    planner_model = "qwen2.5-coder:7b"
+    critic_model = "qwen2.5-coder:7b"
+    planner_context = 4096
+    critic_context = 4096
+
+config = f"""name: local-cursor-open
+version: 1.0.0
+schema: v1
+
+models:
+  - name: Builder
+    provider: ollama
+    model: {builder_model}
+    roles: [chat, edit, apply]
+    capabilities:
+      - tool_use
+    defaultCompletionOptions:
+      contextLength: {builder_context}
+      maxTokens: 1200
+      temperature: 0.1
+      keepAlive: 300
+
+  - name: Autocomplete
+    provider: ollama
+    model: {autocomplete_model}
+    roles: [autocomplete]
+    defaultCompletionOptions:
+      contextLength: {autocomplete_context}
+      maxTokens: 160
+      temperature: 0.05
+      keepAlive: 180
+
+  - name: Planner
+    provider: ollama
+    model: {planner_model}
+    roles: [chat]
+    capabilities:
+      - tool_use
+      - image_input
+    defaultCompletionOptions:
+      contextLength: {planner_context}
+      maxTokens: 1400
+      temperature: 0.2
+      keepAlive: 120
+
+  - name: Critic
+    provider: ollama
+    model: {critic_model}
+    roles: [chat]
+    defaultCompletionOptions:
+      contextLength: {critic_context}
+      maxTokens: 1200
+      temperature: 0.1
+      keepAlive: 120
+
+rules:
+  - Keep changes minimal.
+  - Reuse existing patterns.
+  - Do not refactor unrelated code.
+  - Prefer plain-English answers unless asked for code.
+"""
+
+out_path.write_text(config)
+print(f"Wrote {{out_path}}")