add ollama-proxy.mjs: direct Anthropic-to-Ollama translation, no litellm needed

2026-06-30 08:56:58 +10:00 · 2026-03-31 20:43:49 -04:00 · 2026-03-31 20:43:49 -04:00 · dd7b5bc7d3
commit dd7b5bc7d3
parent 38089ceaf0
2 changed files with 165 additions and 20 deletions
--- a/README.md
+++ b/README.md
@ -51,34 +51,28 @@ node cli.js

 ### With Local Models (Ollama + Qwen3-Coder)

-We patched the source to add `LOCAL_MODEL_BASE_URL` — routes only model API calls to your local proxy while letting auth/startup use Anthropic's servers normally.
+Run Claude Code's UI with a local open-source model. The included `ollama-proxy.mjs` translates between the Anthropic API format and Ollama, routing model calls locally while auth goes to Anthropic normally.

-**Requirements:** [Ollama](https://ollama.com) + [litellm](https://github.com/BerriAI/litellm) + a Claude subscription (for auth)
+**Requirements:** Node.js 18+, [Ollama](https://ollama.com), a Claude subscription (for auth only)

 ```bash
-# Step 1: Pull a model with 128K+ context (required for Claude Code's system prompt)
+# Step 1: Pull a model with 128K+ context
 ollama pull qwen3-coder:30b

-# Step 2: Create litellm config that maps Claude's model name to your local model
-cat > litellm-config.yaml << 'CONF'
-model_list:
-  - model_name: "claude-sonnet-4-20250514"
-    litellm_params:
-      model: "ollama/qwen3-coder:30b"
-      num_ctx: 65536
-litellm_settings:
-  drop_params: true
-CONF
+# Step 2: Start the proxy (included in this repo)
+node ollama-proxy.mjs

-# Step 3: Start litellm proxy (needs Python 3.10+)
-pip install 'litellm[proxy]'
-litellm --config litellm-config.yaml --port 8080
-
-# Step 4: Run Claude Code (in another terminal)
-LOCAL_MODEL_BASE_URL=http://localhost:8080 node cli.js
+# Step 3: Run Claude Code (in another terminal)
+ANTHROPIC_BASE_URL=http://localhost:9090 node cli.js
 ```

-Claude Code authenticates with Anthropic normally (you need a subscription), but all model inference runs locally on Qwen3-Coder via Ollama. Works with any model that has 128K+ context — qwen3-coder, deepseek-r1, llama4, etc.
+The proxy terminal shows color-coded routing:
+- 🟢 `[OLLAMA]` — model calls going to your local Qwen3-Coder
+- 🟡 `[ANTHROPIC]` — auth/config calls going to Anthropic
+
+**How it works:** Claude Code's bundled `cli.js` uses the Anthropic SDK which reads `ANTHROPIC_BASE_URL`. The proxy intercepts `/v1/messages` (model API) and translates them to Ollama's format, while passing everything else (auth, bootstrap, feature flags) through to `api.anthropic.com`.
+
+**To change the model**, edit line 7 in `ollama-proxy.mjs`. Works with any Ollama model that has 128K+ context — `qwen3-coder`, `qwen3.5`, `deepseek-r1:32b`, `llama4`, etc.

 ---

--- a/ollama-proxy.mjs
+++ b/ollama-proxy.mjs
@ -0,0 +1,151 @@
+// Direct Anthropic-to-Ollama proxy for Claude Code
+// Routes /v1/messages → Ollama (format translation)
+// Routes everything else → api.anthropic.com (passthrough)
+import http from 'http';
+import https from 'https';
+
+const OLLAMA = 'http://localhost:11434';
+const MODEL = 'qwen3-coder:30b';
+const PORT = 9090;
+
+function convertAnthropicToOllama(body) {
+  const messages = [];
+
+  // System prompt
+  if (body.system) {
+    const sysText = typeof body.system === 'string'
+      ? body.system
+      : body.system.map(b => b.text || '').join('\n');
+    messages.push({ role: 'system', content: sysText });
+  }
+
+  // Messages
+  for (const msg of (body.messages || [])) {
+    let content = '';
+    if (typeof msg.content === 'string') {
+      content = msg.content;
+    } else if (Array.isArray(msg.content)) {
+      content = msg.content.map(b => b.text || '').filter(Boolean).join('\n');
+    }
+    if (content) {
+      messages.push({ role: msg.role, content });
+    }
+  }
+
+  return {
+    model: MODEL,
+    messages,
+    stream: false,
+    options: { num_predict: body.max_tokens || 4096 },
+  };
+}
+
+function convertOllamaToAnthropic(ollamaRes, requestModel) {
+  const text = ollamaRes.message?.content || '';
+  return {
+    id: 'msg_local_' + Date.now(),
+    type: 'message',
+    role: 'assistant',
+    content: [{ type: 'text', text }],
+    model: requestModel || 'claude-opus-4-6',
+    stop_reason: 'end_turn',
+    stop_sequence: null,
+    usage: {
+      input_tokens: ollamaRes.prompt_eval_count || 0,
+      output_tokens: ollamaRes.eval_count || 0,
+      cache_creation_input_tokens: 0,
+      cache_read_input_tokens: 0,
+    },
+  };
+}
+
+function proxyToAnthropic(req, res) {
+  let body = [];
+  req.on('data', c => body.push(c));
+  req.on('end', () => {
+    const opts = {
+      hostname: 'api.anthropic.com',
+      port: 443,
+      path: req.url,
+      method: req.method,
+      headers: { ...req.headers, host: 'api.anthropic.com' },
+    };
+    const pr = https.request(opts, pr2 => {
+      res.writeHead(pr2.statusCode, pr2.headers);
+      pr2.pipe(res);
+    });
+    pr.on('error', e => { res.writeHead(502); res.end(e.message); });
+    if (body.length) pr.write(Buffer.concat(body));
+    pr.end();
+  });
+}
+
+function handleMessages(req, res) {
+  let body = [];
+  req.on('data', c => body.push(c));
+  req.on('end', () => {
+    let parsed;
+    try {
+      parsed = JSON.parse(Buffer.concat(body).toString());
+    } catch {
+      res.writeHead(400);
+      res.end('Invalid JSON');
+      return;
+    }
+
+    const requestModel = parsed.model;
+    console.log(`\x1b[32m[OLLAMA]\x1b[0m ${req.method} ${req.url} model=${requestModel} stream=${parsed.stream}`);
+
+    // Force non-streaming (simpler translation)
+    const ollamaBody = convertAnthropicToOllama(parsed);
+    const payload = JSON.stringify(ollamaBody);
+
+    const ollamaReq = http.request(
+      `${OLLAMA}/api/chat`,
+      { method: 'POST', headers: { 'Content-Type': 'application/json' } },
+      ollamaRes => {
+        let data = [];
+        ollamaRes.on('data', c => data.push(c));
+        ollamaRes.on('end', () => {
+          try {
+            const ollamaResult = JSON.parse(Buffer.concat(data).toString());
+            const anthropicResponse = convertOllamaToAnthropic(ollamaResult, requestModel);
+            const respBody = JSON.stringify(anthropicResponse);
+            console.log(`\x1b[32m[OLLAMA]\x1b[0m ← ${ollamaResult.eval_count || '?'} tokens, ${((ollamaResult.total_duration || 0) / 1e9).toFixed(1)}s`);
+            res.writeHead(200, {
+              'Content-Type': 'application/json',
+              'Content-Length': Buffer.byteLength(respBody),
+            });
+            res.end(respBody);
+          } catch (e) {
+            console.error('[OLLAMA] Parse error:', e.message);
+            res.writeHead(500);
+            res.end('Ollama response parse error');
+          }
+        });
+      },
+    );
+    ollamaReq.on('error', e => {
+      console.error('[OLLAMA] Connection error:', e.message);
+      res.writeHead(502);
+      res.end('Ollama connection error: ' + e.message);
+    });
+    ollamaReq.write(payload);
+    ollamaReq.end();
+  });
+}
+
+const server = http.createServer((req, res) => {
+  if (req.url?.startsWith('/v1/messages')) {
+    handleMessages(req, res);
+  } else {
+    console.log(`\x1b[33m[ANTHROPIC]\x1b[0m ${req.method} ${req.url}`);
+    proxyToAnthropic(req, res);
+  }
+});
+
+server.listen(PORT, () => {
+  console.log(`\n🔀 Ollama proxy on :${PORT}`);
+  console.log(`   /v1/messages → Ollama ${MODEL} (Anthropic format translation)`);
+  console.log(`   everything else → api.anthropic.com\n`);
+});