mirror of
http://10.0.2.1:3031/sauer/claude-code.git
synced 2026-06-30 10:06:57 +10:00
add ollama-proxy.mjs: direct Anthropic-to-Ollama translation, no litellm needed
This commit is contained in:
parent
38089ceaf0
commit
dd7b5bc7d3
34
README.md
34
README.md
@ -51,34 +51,28 @@ node cli.js
|
|||||||
|
|
||||||
### With Local Models (Ollama + Qwen3-Coder)
|
### With Local Models (Ollama + Qwen3-Coder)
|
||||||
|
|
||||||
We patched the source to add `LOCAL_MODEL_BASE_URL` — routes only model API calls to your local proxy while letting auth/startup use Anthropic's servers normally.
|
Run Claude Code's UI with a local open-source model. The included `ollama-proxy.mjs` translates between the Anthropic API format and Ollama, routing model calls locally while auth goes to Anthropic normally.
|
||||||
|
|
||||||
**Requirements:** [Ollama](https://ollama.com) + [litellm](https://github.com/BerriAI/litellm) + a Claude subscription (for auth)
|
**Requirements:** Node.js 18+, [Ollama](https://ollama.com), a Claude subscription (for auth only)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Step 1: Pull a model with 128K+ context (required for Claude Code's system prompt)
|
# Step 1: Pull a model with 128K+ context
|
||||||
ollama pull qwen3-coder:30b
|
ollama pull qwen3-coder:30b
|
||||||
|
|
||||||
# Step 2: Create litellm config that maps Claude's model name to your local model
|
# Step 2: Start the proxy (included in this repo)
|
||||||
cat > litellm-config.yaml << 'CONF'
|
node ollama-proxy.mjs
|
||||||
model_list:
|
|
||||||
- model_name: "claude-sonnet-4-20250514"
|
|
||||||
litellm_params:
|
|
||||||
model: "ollama/qwen3-coder:30b"
|
|
||||||
num_ctx: 65536
|
|
||||||
litellm_settings:
|
|
||||||
drop_params: true
|
|
||||||
CONF
|
|
||||||
|
|
||||||
# Step 3: Start litellm proxy (needs Python 3.10+)
|
# Step 3: Run Claude Code (in another terminal)
|
||||||
pip install 'litellm[proxy]'
|
ANTHROPIC_BASE_URL=http://localhost:9090 node cli.js
|
||||||
litellm --config litellm-config.yaml --port 8080
|
|
||||||
|
|
||||||
# Step 4: Run Claude Code (in another terminal)
|
|
||||||
LOCAL_MODEL_BASE_URL=http://localhost:8080 node cli.js
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Claude Code authenticates with Anthropic normally (you need a subscription), but all model inference runs locally on Qwen3-Coder via Ollama. Works with any model that has 128K+ context — qwen3-coder, deepseek-r1, llama4, etc.
|
The proxy terminal shows color-coded routing:
|
||||||
|
- 🟢 `[OLLAMA]` — model calls going to your local Qwen3-Coder
|
||||||
|
- 🟡 `[ANTHROPIC]` — auth/config calls going to Anthropic
|
||||||
|
|
||||||
|
**How it works:** Claude Code's bundled `cli.js` uses the Anthropic SDK which reads `ANTHROPIC_BASE_URL`. The proxy intercepts `/v1/messages` (model API) and translates them to Ollama's format, while passing everything else (auth, bootstrap, feature flags) through to `api.anthropic.com`.
|
||||||
|
|
||||||
|
**To change the model**, edit line 7 in `ollama-proxy.mjs`. Works with any Ollama model that has 128K+ context — `qwen3-coder`, `qwen3.5`, `deepseek-r1:32b`, `llama4`, etc.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
151
ollama-proxy.mjs
Normal file
151
ollama-proxy.mjs
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
// Direct Anthropic-to-Ollama proxy for Claude Code
|
||||||
|
// Routes /v1/messages → Ollama (format translation)
|
||||||
|
// Routes everything else → api.anthropic.com (passthrough)
|
||||||
|
import http from 'http';
|
||||||
|
import https from 'https';
|
||||||
|
|
||||||
|
const OLLAMA = 'http://localhost:11434';
|
||||||
|
const MODEL = 'qwen3-coder:30b';
|
||||||
|
const PORT = 9090;
|
||||||
|
|
||||||
|
function convertAnthropicToOllama(body) {
|
||||||
|
const messages = [];
|
||||||
|
|
||||||
|
// System prompt
|
||||||
|
if (body.system) {
|
||||||
|
const sysText = typeof body.system === 'string'
|
||||||
|
? body.system
|
||||||
|
: body.system.map(b => b.text || '').join('\n');
|
||||||
|
messages.push({ role: 'system', content: sysText });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Messages
|
||||||
|
for (const msg of (body.messages || [])) {
|
||||||
|
let content = '';
|
||||||
|
if (typeof msg.content === 'string') {
|
||||||
|
content = msg.content;
|
||||||
|
} else if (Array.isArray(msg.content)) {
|
||||||
|
content = msg.content.map(b => b.text || '').filter(Boolean).join('\n');
|
||||||
|
}
|
||||||
|
if (content) {
|
||||||
|
messages.push({ role: msg.role, content });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
model: MODEL,
|
||||||
|
messages,
|
||||||
|
stream: false,
|
||||||
|
options: { num_predict: body.max_tokens || 4096 },
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function convertOllamaToAnthropic(ollamaRes, requestModel) {
|
||||||
|
const text = ollamaRes.message?.content || '';
|
||||||
|
return {
|
||||||
|
id: 'msg_local_' + Date.now(),
|
||||||
|
type: 'message',
|
||||||
|
role: 'assistant',
|
||||||
|
content: [{ type: 'text', text }],
|
||||||
|
model: requestModel || 'claude-opus-4-6',
|
||||||
|
stop_reason: 'end_turn',
|
||||||
|
stop_sequence: null,
|
||||||
|
usage: {
|
||||||
|
input_tokens: ollamaRes.prompt_eval_count || 0,
|
||||||
|
output_tokens: ollamaRes.eval_count || 0,
|
||||||
|
cache_creation_input_tokens: 0,
|
||||||
|
cache_read_input_tokens: 0,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function proxyToAnthropic(req, res) {
|
||||||
|
let body = [];
|
||||||
|
req.on('data', c => body.push(c));
|
||||||
|
req.on('end', () => {
|
||||||
|
const opts = {
|
||||||
|
hostname: 'api.anthropic.com',
|
||||||
|
port: 443,
|
||||||
|
path: req.url,
|
||||||
|
method: req.method,
|
||||||
|
headers: { ...req.headers, host: 'api.anthropic.com' },
|
||||||
|
};
|
||||||
|
const pr = https.request(opts, pr2 => {
|
||||||
|
res.writeHead(pr2.statusCode, pr2.headers);
|
||||||
|
pr2.pipe(res);
|
||||||
|
});
|
||||||
|
pr.on('error', e => { res.writeHead(502); res.end(e.message); });
|
||||||
|
if (body.length) pr.write(Buffer.concat(body));
|
||||||
|
pr.end();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleMessages(req, res) {
|
||||||
|
let body = [];
|
||||||
|
req.on('data', c => body.push(c));
|
||||||
|
req.on('end', () => {
|
||||||
|
let parsed;
|
||||||
|
try {
|
||||||
|
parsed = JSON.parse(Buffer.concat(body).toString());
|
||||||
|
} catch {
|
||||||
|
res.writeHead(400);
|
||||||
|
res.end('Invalid JSON');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const requestModel = parsed.model;
|
||||||
|
console.log(`\x1b[32m[OLLAMA]\x1b[0m ${req.method} ${req.url} model=${requestModel} stream=${parsed.stream}`);
|
||||||
|
|
||||||
|
// Force non-streaming (simpler translation)
|
||||||
|
const ollamaBody = convertAnthropicToOllama(parsed);
|
||||||
|
const payload = JSON.stringify(ollamaBody);
|
||||||
|
|
||||||
|
const ollamaReq = http.request(
|
||||||
|
`${OLLAMA}/api/chat`,
|
||||||
|
{ method: 'POST', headers: { 'Content-Type': 'application/json' } },
|
||||||
|
ollamaRes => {
|
||||||
|
let data = [];
|
||||||
|
ollamaRes.on('data', c => data.push(c));
|
||||||
|
ollamaRes.on('end', () => {
|
||||||
|
try {
|
||||||
|
const ollamaResult = JSON.parse(Buffer.concat(data).toString());
|
||||||
|
const anthropicResponse = convertOllamaToAnthropic(ollamaResult, requestModel);
|
||||||
|
const respBody = JSON.stringify(anthropicResponse);
|
||||||
|
console.log(`\x1b[32m[OLLAMA]\x1b[0m ← ${ollamaResult.eval_count || '?'} tokens, ${((ollamaResult.total_duration || 0) / 1e9).toFixed(1)}s`);
|
||||||
|
res.writeHead(200, {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Content-Length': Buffer.byteLength(respBody),
|
||||||
|
});
|
||||||
|
res.end(respBody);
|
||||||
|
} catch (e) {
|
||||||
|
console.error('[OLLAMA] Parse error:', e.message);
|
||||||
|
res.writeHead(500);
|
||||||
|
res.end('Ollama response parse error');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
},
|
||||||
|
);
|
||||||
|
ollamaReq.on('error', e => {
|
||||||
|
console.error('[OLLAMA] Connection error:', e.message);
|
||||||
|
res.writeHead(502);
|
||||||
|
res.end('Ollama connection error: ' + e.message);
|
||||||
|
});
|
||||||
|
ollamaReq.write(payload);
|
||||||
|
ollamaReq.end();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const server = http.createServer((req, res) => {
|
||||||
|
if (req.url?.startsWith('/v1/messages')) {
|
||||||
|
handleMessages(req, res);
|
||||||
|
} else {
|
||||||
|
console.log(`\x1b[33m[ANTHROPIC]\x1b[0m ${req.method} ${req.url}`);
|
||||||
|
proxyToAnthropic(req, res);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
server.listen(PORT, () => {
|
||||||
|
console.log(`\n🔀 Ollama proxy on :${PORT}`);
|
||||||
|
console.log(` /v1/messages → Ollama ${MODEL} (Anthropic format translation)`);
|
||||||
|
console.log(` everything else → api.anthropic.com\n`);
|
||||||
|
});
|
||||||
Loading…
Reference in New Issue
Block a user