Fix AI prompts: switch to OpenAI-compatible /v1/chat/completions API

The server at ai.binarygnome.com returns 405 on POST /api/generate, which means it speaks the OpenAI-compatible API rather than the native Ollama format. Switch to /v1/chat/completions with messages[] payload and data.choices[0].message.content response parsing. Also add optional OLLAMA_API_KEY env var for servers that require a Bearer token. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-24 20:27:31 -04:00 · 2026-05-24 20:27:31 -04:00 · 0cae6c6188
commit 0cae6c6188
parent a4a982aed7
2 changed files with 14 additions and 6 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -10,6 +10,7 @@ services:
      - ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin}
      - OLLAMA_URL=${OLLAMA_URL:-https://ai.binarygnome.com}
      - OLLAMA_MODEL=${OLLAMA_MODEL:-gemma4:latest}
      - OLLAMA_API_KEY=${OLLAMA_API_KEY:-}
    extra_hosts:
      - "host-gateway:host-gateway"
    networks:
--- a/server/routes/prompts.js
+++ b/server/routes/prompts.js
@ -3,6 +3,7 @@ import { auth } from '../middleware/auth.js'
 const OLLAMA_URL   = process.env.OLLAMA_URL   || 'http://localhost:11434'
 const OLLAMA_MODEL = process.env.OLLAMA_MODEL || 'llama3.2'
 const OLLAMA_KEY   = process.env.OLLAMA_API_KEY || ''   // optional Bearer token
 const FALLBACK = [
  // ── Original 20 ───────────────────────────────────────────
@ -173,17 +174,23 @@ function nextFallback() {
  return fallbackQueue.pop()
 }
-// ── Shared Ollama call (returns parsed response or throws with detail) ────
+// ── Shared AI call — uses the OpenAI-compatible chat completions API ─────
 async function callOllama() {
-  const url = `${OLLAMA_URL}/api/generate`
+  const url = `${OLLAMA_URL}/v1/chat/completions`
-  console.log(`[prompts] calling Ollama  url=${url}  model=${OLLAMA_MODEL}`)
+  console.log(`[prompts] calling AI  url=${url}  model=${OLLAMA_MODEL}`)
  const headers = { 'Content-Type': 'application/json' }
  if (OLLAMA_KEY) headers['Authorization'] = `Bearer ${OLLAMA_KEY}`
  const r = await fetch(url, {
    method: 'POST',
-    headers: { 'Content-Type': 'application/json' },
+    headers,
    body: JSON.stringify({
      model: OLLAMA_MODEL,
-      prompt: 'Write one creative writing prompt for a young writer aged 10–14. Make it imaginative, specific, and intriguing — a little mysterious or adventurous. Write only the prompt itself: no introduction, no explanation, no quotation marks. Maximum two sentences.',
+      messages: [{
        role: 'user',
        content: 'Write one creative writing prompt for a young writer aged 10–14. Make it imaginative, specific, and intriguing — a little mysterious or adventurous. Write only the prompt itself: no introduction, no explanation, no quotation marks. Maximum two sentences.',
      }],
      stream: false,
    }),
    signal: AbortSignal.timeout(45000),
@ -195,7 +202,7 @@ async function callOllama() {
  }
  const data = await r.json()
-  const prompt = data.response?.trim()
+  const prompt = data.choices?.[0]?.message?.content?.trim()
  if (!prompt) throw new Error(`empty response — raw: ${JSON.stringify(data).slice(0, 200)}`)
  return prompt
 }