fix(diarization-ui): validate non-empty content before LLM call, add OLLAMA_THINK flag
Empty documents caused the model to spin in its thinking loop and waste all tokens. Now raises a clear job error before the Ollama call. Also adds OLLAMA_THINK env var (default false) to control whether the model uses extended thinking - disabling it avoids runaway thinking loops on ambiguous inputs. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,3 +2,4 @@ API_BASE=http://gx10.aquantico.lan:8093
|
|||||||
OLLAMA_BASE_URL=http://gx10.aquantico.lan:11434
|
OLLAMA_BASE_URL=http://gx10.aquantico.lan:11434
|
||||||
OLLAMA_MODEL=qwen3.5:9b
|
OLLAMA_MODEL=qwen3.5:9b
|
||||||
OLLAMA_NUM_PREDICT=4096
|
OLLAMA_NUM_PREDICT=4096
|
||||||
|
OLLAMA_THINK=false
|
||||||
|
|||||||
5
app.py
5
app.py
@@ -16,6 +16,7 @@ API_BASE = os.getenv("API_BASE", "http://gx10.aquantico.lan:8093").rstrip("/")
|
|||||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://gx10.aquantico.lan:11434").rstrip("/")
|
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://gx10.aquantico.lan:11434").rstrip("/")
|
||||||
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:9b")
|
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:9b")
|
||||||
OLLAMA_NUM_PREDICT = int(os.getenv("OLLAMA_NUM_PREDICT", "4096"))
|
OLLAMA_NUM_PREDICT = int(os.getenv("OLLAMA_NUM_PREDICT", "4096"))
|
||||||
|
OLLAMA_THINK = os.getenv("OLLAMA_THINK", "false").lower() in ("1", "true", "yes")
|
||||||
DB_PATH = os.getenv("DB_PATH", "/data/ui.db")
|
DB_PATH = os.getenv("DB_PATH", "/data/ui.db")
|
||||||
|
|
||||||
app = FastAPI(title="Diarization UI")
|
app = FastAPI(title="Diarization UI")
|
||||||
@@ -371,6 +372,8 @@ def _process_analysis_job(job_id: int):
|
|||||||
prm = c.execute("SELECT * FROM prompts WHERE id=?", (j["prompt_id"],)).fetchone()
|
prm = c.execute("SELECT * FROM prompts WHERE id=?", (j["prompt_id"],)).fetchone()
|
||||||
if not doc or not prm:
|
if not doc or not prm:
|
||||||
raise RuntimeError("Dokument oder Prompt nicht gefunden")
|
raise RuntimeError("Dokument oder Prompt nicht gefunden")
|
||||||
|
if not (doc["content_md"] or "").strip():
|
||||||
|
raise RuntimeError("Dokument hat keinen Inhalt – bitte zuerst das Transkript prüfen")
|
||||||
|
|
||||||
user_extra = (j.get("user_prompt") or "").strip()
|
user_extra = (j.get("user_prompt") or "").strip()
|
||||||
llm_prompt = (
|
llm_prompt = (
|
||||||
@@ -387,7 +390,7 @@ def _process_analysis_job(job_id: int):
|
|||||||
|
|
||||||
r = requests.post(
|
r = requests.post(
|
||||||
f"{OLLAMA_BASE_URL}/api/generate",
|
f"{OLLAMA_BASE_URL}/api/generate",
|
||||||
json={"model": OLLAMA_MODEL, "prompt": llm_prompt, "stream": True, "options": {
|
json={"model": OLLAMA_MODEL, "prompt": llm_prompt, "stream": True, "think": OLLAMA_THINK, "options": {
|
||||||
"num_ctx": num_ctx,
|
"num_ctx": num_ctx,
|
||||||
"num_predict": OLLAMA_NUM_PREDICT,
|
"num_predict": OLLAMA_NUM_PREDICT,
|
||||||
"repeat_penalty": 1.15,
|
"repeat_penalty": 1.15,
|
||||||
|
|||||||
Reference in New Issue
Block a user