From f5756f38afea3d47e39b273e12900e7fc8bf6336 Mon Sep 17 00:00:00 2001
From: wb <wb@aquantico.de>
Date: Wed, 6 May 2026 16:10:01 +0200
Subject: [PATCH] fix(diarization-ui): validate non-empty content before LLM
 call, add OLLAMA_THINK flag

Empty documents caused the model to spin in its thinking loop and waste
all tokens. Now raises a clear job error before the Ollama call.
Also adds OLLAMA_THINK env var (default false) to control whether
the model uses extended thinking - disabling it avoids runaway thinking
loops on ambiguous inputs.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .env.example | 1 +
 app.py       | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/.env.example b/.env.example
index 147c8f1..4cf5c11 100644
--- a/.env.example
+++ b/.env.example
@@ -2,3 +2,4 @@ API_BASE=http://gx10.aquantico.lan:8093
 OLLAMA_BASE_URL=http://gx10.aquantico.lan:11434
 OLLAMA_MODEL=qwen3.5:9b
 OLLAMA_NUM_PREDICT=4096
+OLLAMA_THINK=false
diff --git a/app.py b/app.py
index 35db195..ef40506 100644
--- a/app.py
+++ b/app.py
@@ -16,6 +16,7 @@ API_BASE = os.getenv("API_BASE", "http://gx10.aquantico.lan:8093").rstrip("/")
 OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://gx10.aquantico.lan:11434").rstrip("/")
 OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:9b")
 OLLAMA_NUM_PREDICT = int(os.getenv("OLLAMA_NUM_PREDICT", "4096"))
+OLLAMA_THINK = os.getenv("OLLAMA_THINK", "false").lower() in ("1", "true", "yes")
 DB_PATH = os.getenv("DB_PATH", "/data/ui.db")
 
 app = FastAPI(title="Diarization UI")
@@ -371,6 +372,8 @@ def _process_analysis_job(job_id: int):
             prm = c.execute("SELECT * FROM prompts WHERE id=?", (j["prompt_id"],)).fetchone()
         if not doc or not prm:
             raise RuntimeError("Dokument oder Prompt nicht gefunden")
+        if not (doc["content_md"] or "").strip():
+            raise RuntimeError("Dokument hat keinen Inhalt – bitte zuerst das Transkript prüfen")
 
         user_extra = (j.get("user_prompt") or "").strip()
         llm_prompt = (
@@ -387,7 +390,7 @@ def _process_analysis_job(job_id: int):
 
         r = requests.post(
             f"{OLLAMA_BASE_URL}/api/generate",
-            json={"model": OLLAMA_MODEL, "prompt": llm_prompt, "stream": True, "options": {
+            json={"model": OLLAMA_MODEL, "prompt": llm_prompt, "stream": True, "think": OLLAMA_THINK, "options": {
                 "num_ctx": num_ctx,
                 "num_predict": OLLAMA_NUM_PREDICT,
                 "repeat_penalty": 1.15,