Fix AI prompts: switch to OpenAI-compatible /v1/chat/completions API

The server at ai.binarygnome.com returns 405 on POST /api/generate,
which means it speaks the OpenAI-compatible API rather than the native
Ollama format. Switch to /v1/chat/completions with messages[] payload
and data.choices[0].message.content response parsing.

Also add optional OLLAMA_API_KEY env var for servers that require a
Bearer token.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
chris 2026-05-24 20:27:31 -04:00
parent a4a982aed7
commit 0cae6c6188
2 changed files with 14 additions and 6 deletions

View File

@ -10,6 +10,7 @@ services:
- ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin} - ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin}
- OLLAMA_URL=${OLLAMA_URL:-https://ai.binarygnome.com} - OLLAMA_URL=${OLLAMA_URL:-https://ai.binarygnome.com}
- OLLAMA_MODEL=${OLLAMA_MODEL:-gemma4:latest} - OLLAMA_MODEL=${OLLAMA_MODEL:-gemma4:latest}
- OLLAMA_API_KEY=${OLLAMA_API_KEY:-}
extra_hosts: extra_hosts:
- "host-gateway:host-gateway" - "host-gateway:host-gateway"
networks: networks:

View File

@ -3,6 +3,7 @@ import { auth } from '../middleware/auth.js'
const OLLAMA_URL = process.env.OLLAMA_URL || 'http://localhost:11434' const OLLAMA_URL = process.env.OLLAMA_URL || 'http://localhost:11434'
const OLLAMA_MODEL = process.env.OLLAMA_MODEL || 'llama3.2' const OLLAMA_MODEL = process.env.OLLAMA_MODEL || 'llama3.2'
const OLLAMA_KEY = process.env.OLLAMA_API_KEY || '' // optional Bearer token
const FALLBACK = [ const FALLBACK = [
// ── Original 20 ─────────────────────────────────────────── // ── Original 20 ───────────────────────────────────────────
@ -173,17 +174,23 @@ function nextFallback() {
return fallbackQueue.pop() return fallbackQueue.pop()
} }
// ── Shared Ollama call (returns parsed response or throws with detail) ──── // ── Shared AI call — uses the OpenAI-compatible chat completions API ─────
async function callOllama() { async function callOllama() {
const url = `${OLLAMA_URL}/api/generate` const url = `${OLLAMA_URL}/v1/chat/completions`
console.log(`[prompts] calling Ollama url=${url} model=${OLLAMA_MODEL}`) console.log(`[prompts] calling AI url=${url} model=${OLLAMA_MODEL}`)
const headers = { 'Content-Type': 'application/json' }
if (OLLAMA_KEY) headers['Authorization'] = `Bearer ${OLLAMA_KEY}`
const r = await fetch(url, { const r = await fetch(url, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers,
body: JSON.stringify({ body: JSON.stringify({
model: OLLAMA_MODEL, model: OLLAMA_MODEL,
prompt: 'Write one creative writing prompt for a young writer aged 1014. Make it imaginative, specific, and intriguing — a little mysterious or adventurous. Write only the prompt itself: no introduction, no explanation, no quotation marks. Maximum two sentences.', messages: [{
role: 'user',
content: 'Write one creative writing prompt for a young writer aged 1014. Make it imaginative, specific, and intriguing — a little mysterious or adventurous. Write only the prompt itself: no introduction, no explanation, no quotation marks. Maximum two sentences.',
}],
stream: false, stream: false,
}), }),
signal: AbortSignal.timeout(45000), signal: AbortSignal.timeout(45000),
@ -195,7 +202,7 @@ async function callOllama() {
} }
const data = await r.json() const data = await r.json()
const prompt = data.response?.trim() const prompt = data.choices?.[0]?.message?.content?.trim()
if (!prompt) throw new Error(`empty response — raw: ${JSON.stringify(data).slice(0, 200)}`) if (!prompt) throw new Error(`empty response — raw: ${JSON.stringify(data).slice(0, 200)}`)
return prompt return prompt
} }