From 4f651fac1274a2ab85ffa2adc926d69e5a8aa36f Mon Sep 17 00:00:00 2001 From: wb Date: Wed, 6 May 2026 15:50:40 +0200 Subject: [PATCH] fix(diarization-ui): capture thinking tokens in debug stream (Qwen3) Ollama streaming chunks for thinking models use a separate "thinking" field. Previously only "response" was captured, leaving the debug window empty while the model reasoned. Now both fields are tracked independently: thinking is shown in blue above the final answer, both are persisted to new llm_thinking / existing llm_response columns. Co-Authored-By: Claude Sonnet 4.6 --- app.py | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/app.py b/app.py index e4ac8fd..7b11f45 100644 --- a/app.py +++ b/app.py @@ -110,6 +110,10 @@ def init_db(): c.execute("ALTER TABLE jobs ADD COLUMN llm_response TEXT") except Exception: pass + try: + c.execute("ALTER TABLE jobs ADD COLUMN llm_thinking TEXT") + except Exception: + pass # defaults c.execute("INSERT OR IGNORE INTO projects(name, created_at) VALUES (?,?)", ("Default", now_iso())) @@ -369,7 +373,7 @@ def _process_analysis_job(job_id: int): _job_set(job_id, llm_prompt=llm_prompt) with _JOB_STREAM_LOCK: - _JOB_STREAMS[job_id] = "" + _JOB_STREAMS[job_id] = {"thinking": "", "response": ""} r = requests.post( f"{OLLAMA_BASE_URL}/api/generate", @@ -379,16 +383,18 @@ def _process_analysis_job(job_id: int): ) r.raise_for_status() - accumulated = "" + acc_thinking = "" + acc_response = "" ollama_final = {} chunk_count = 0 for line in r.iter_lines(): if not line: continue chunk = json.loads(line) - accumulated += chunk.get("response", "") + acc_thinking += chunk.get("thinking") or "" + acc_response += chunk.get("response") or "" with _JOB_STREAM_LOCK: - _JOB_STREAMS[job_id] = accumulated + _JOB_STREAMS[job_id] = {"thinking": acc_thinking, "response": acc_response} chunk_count += 1 if chunk_count % 100 == 0: j_check = _job_get(job_id) @@ -398,7 +404,7 @@ def _process_analysis_job(job_id: int): ollama_final = chunk break - answer = accumulated + answer = acc_response j = _job_get(job_id) if not j or j["status"] == "cancelled": @@ -423,7 +429,7 @@ def _process_analysis_job(job_id: int): ) new_doc_id = cur.lastrowid - _job_set(job_id, status="done", result_document_id=new_doc_id, finished_at=now_iso(), llm_response=answer) + _job_set(job_id, status="done", result_document_id=new_doc_id, finished_at=now_iso(), llm_response=answer, llm_thinking=acc_thinking or None) except Exception as e: _job_set(job_id, status="error", error=str(e), finished_at=now_iso()) finally: @@ -1302,12 +1308,13 @@ def job_debug_data(job_id: int): if not j: raise HTTPException(404, "job not found") with _JOB_STREAM_LOCK: - live = _JOB_STREAMS.get(job_id) + live = dict(_JOB_STREAMS[job_id]) if job_id in _JOB_STREAMS else None return { "status": j["status"], "kind": j["kind"], "llm_prompt": j.get("llm_prompt"), - "llm_response": live if live is not None else j.get("llm_response"), + "llm_thinking": live["thinking"] if live is not None else j.get("llm_thinking"), + "llm_response": live["response"] if live is not None else j.get("llm_response"), "streaming": live is not None, } @@ -1530,7 +1537,16 @@ async function refreshDebug() {{ document.getElementById('debug-prompt-content').textContent = d.llm_prompt || '(kein Prompt gespeichert)'; const respEl = document.getElementById('debug-response-content'); const atBottom = respEl.scrollHeight - respEl.scrollTop <= respEl.clientHeight + 20; - respEl.textContent = d.llm_response || '(noch keine Antwort)'; + const thinking = d.llm_thinking || ''; + const response = d.llm_response || ''; + if (thinking) {{ + respEl.innerHTML = + `▶ THINKING\n` + + `${{thinking.replace(/` + + (response ? `\n\n▶ ANTWORT\n${{response.replace(/` : ''); + }} else {{ + respEl.textContent = response || '(noch keine Antwort)'; + }} if (atBottom) respEl.scrollTop = respEl.scrollHeight; const badge = document.getElementById('debug-streaming-badge'); badge.classList.toggle('d-none', !d.streaming);