diff --git a/README.md b/README.md index d48d101..e7591cd 100644 --- a/README.md +++ b/README.md @@ -51,34 +51,28 @@ node cli.js ### With Local Models (Ollama + Qwen3-Coder) -We patched the source to add `LOCAL_MODEL_BASE_URL` — routes only model API calls to your local proxy while letting auth/startup use Anthropic's servers normally. +Run Claude Code's UI with a local open-source model. The included `ollama-proxy.mjs` translates between the Anthropic API format and Ollama, routing model calls locally while auth goes to Anthropic normally. -**Requirements:** [Ollama](https://ollama.com) + [litellm](https://github.com/BerriAI/litellm) + a Claude subscription (for auth) +**Requirements:** Node.js 18+, [Ollama](https://ollama.com), a Claude subscription (for auth only) ```bash -# Step 1: Pull a model with 128K+ context (required for Claude Code's system prompt) +# Step 1: Pull a model with 128K+ context ollama pull qwen3-coder:30b -# Step 2: Create litellm config that maps Claude's model name to your local model -cat > litellm-config.yaml << 'CONF' -model_list: - - model_name: "claude-sonnet-4-20250514" - litellm_params: - model: "ollama/qwen3-coder:30b" - num_ctx: 65536 -litellm_settings: - drop_params: true -CONF +# Step 2: Start the proxy (included in this repo) +node ollama-proxy.mjs -# Step 3: Start litellm proxy (needs Python 3.10+) -pip install 'litellm[proxy]' -litellm --config litellm-config.yaml --port 8080 - -# Step 4: Run Claude Code (in another terminal) -LOCAL_MODEL_BASE_URL=http://localhost:8080 node cli.js +# Step 3: Run Claude Code (in another terminal) +ANTHROPIC_BASE_URL=http://localhost:9090 node cli.js ``` -Claude Code authenticates with Anthropic normally (you need a subscription), but all model inference runs locally on Qwen3-Coder via Ollama. Works with any model that has 128K+ context — qwen3-coder, deepseek-r1, llama4, etc. +The proxy terminal shows color-coded routing: +- 🟢 `[OLLAMA]` — model calls going to your local Qwen3-Coder +- 🟔 `[ANTHROPIC]` — auth/config calls going to Anthropic + +**How it works:** Claude Code's bundled `cli.js` uses the Anthropic SDK which reads `ANTHROPIC_BASE_URL`. The proxy intercepts `/v1/messages` (model API) and translates them to Ollama's format, while passing everything else (auth, bootstrap, feature flags) through to `api.anthropic.com`. + +**To change the model**, edit line 7 in `ollama-proxy.mjs`. Works with any Ollama model that has 128K+ context — `qwen3-coder`, `qwen3.5`, `deepseek-r1:32b`, `llama4`, etc. --- diff --git a/ollama-proxy.mjs b/ollama-proxy.mjs new file mode 100644 index 0000000..d30f103 --- /dev/null +++ b/ollama-proxy.mjs @@ -0,0 +1,151 @@ +// Direct Anthropic-to-Ollama proxy for Claude Code +// Routes /v1/messages → Ollama (format translation) +// Routes everything else → api.anthropic.com (passthrough) +import http from 'http'; +import https from 'https'; + +const OLLAMA = 'http://localhost:11434'; +const MODEL = 'qwen3-coder:30b'; +const PORT = 9090; + +function convertAnthropicToOllama(body) { + const messages = []; + + // System prompt + if (body.system) { + const sysText = typeof body.system === 'string' + ? body.system + : body.system.map(b => b.text || '').join('\n'); + messages.push({ role: 'system', content: sysText }); + } + + // Messages + for (const msg of (body.messages || [])) { + let content = ''; + if (typeof msg.content === 'string') { + content = msg.content; + } else if (Array.isArray(msg.content)) { + content = msg.content.map(b => b.text || '').filter(Boolean).join('\n'); + } + if (content) { + messages.push({ role: msg.role, content }); + } + } + + return { + model: MODEL, + messages, + stream: false, + options: { num_predict: body.max_tokens || 4096 }, + }; +} + +function convertOllamaToAnthropic(ollamaRes, requestModel) { + const text = ollamaRes.message?.content || ''; + return { + id: 'msg_local_' + Date.now(), + type: 'message', + role: 'assistant', + content: [{ type: 'text', text }], + model: requestModel || 'claude-opus-4-6', + stop_reason: 'end_turn', + stop_sequence: null, + usage: { + input_tokens: ollamaRes.prompt_eval_count || 0, + output_tokens: ollamaRes.eval_count || 0, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }, + }; +} + +function proxyToAnthropic(req, res) { + let body = []; + req.on('data', c => body.push(c)); + req.on('end', () => { + const opts = { + hostname: 'api.anthropic.com', + port: 443, + path: req.url, + method: req.method, + headers: { ...req.headers, host: 'api.anthropic.com' }, + }; + const pr = https.request(opts, pr2 => { + res.writeHead(pr2.statusCode, pr2.headers); + pr2.pipe(res); + }); + pr.on('error', e => { res.writeHead(502); res.end(e.message); }); + if (body.length) pr.write(Buffer.concat(body)); + pr.end(); + }); +} + +function handleMessages(req, res) { + let body = []; + req.on('data', c => body.push(c)); + req.on('end', () => { + let parsed; + try { + parsed = JSON.parse(Buffer.concat(body).toString()); + } catch { + res.writeHead(400); + res.end('Invalid JSON'); + return; + } + + const requestModel = parsed.model; + console.log(`\x1b[32m[OLLAMA]\x1b[0m ${req.method} ${req.url} model=${requestModel} stream=${parsed.stream}`); + + // Force non-streaming (simpler translation) + const ollamaBody = convertAnthropicToOllama(parsed); + const payload = JSON.stringify(ollamaBody); + + const ollamaReq = http.request( + `${OLLAMA}/api/chat`, + { method: 'POST', headers: { 'Content-Type': 'application/json' } }, + ollamaRes => { + let data = []; + ollamaRes.on('data', c => data.push(c)); + ollamaRes.on('end', () => { + try { + const ollamaResult = JSON.parse(Buffer.concat(data).toString()); + const anthropicResponse = convertOllamaToAnthropic(ollamaResult, requestModel); + const respBody = JSON.stringify(anthropicResponse); + console.log(`\x1b[32m[OLLAMA]\x1b[0m ← ${ollamaResult.eval_count || '?'} tokens, ${((ollamaResult.total_duration || 0) / 1e9).toFixed(1)}s`); + res.writeHead(200, { + 'Content-Type': 'application/json', + 'Content-Length': Buffer.byteLength(respBody), + }); + res.end(respBody); + } catch (e) { + console.error('[OLLAMA] Parse error:', e.message); + res.writeHead(500); + res.end('Ollama response parse error'); + } + }); + }, + ); + ollamaReq.on('error', e => { + console.error('[OLLAMA] Connection error:', e.message); + res.writeHead(502); + res.end('Ollama connection error: ' + e.message); + }); + ollamaReq.write(payload); + ollamaReq.end(); + }); +} + +const server = http.createServer((req, res) => { + if (req.url?.startsWith('/v1/messages')) { + handleMessages(req, res); + } else { + console.log(`\x1b[33m[ANTHROPIC]\x1b[0m ${req.method} ${req.url}`); + proxyToAnthropic(req, res); + } +}); + +server.listen(PORT, () => { + console.log(`\nšŸ”€ Ollama proxy on :${PORT}`); + console.log(` /v1/messages → Ollama ${MODEL} (Anthropic format translation)`); + console.log(` everything else → api.anthropic.com\n`); +});