feat(diarization-ui): dynamic num_ctx for Ollama based on prompt size
Estimates required context window from prompt length (chars/3 + 2048 response buffer) and rounds up to the nearest fixed tier (4096, 8192, 16384, 32768, 65536). Fixed tiers prevent Ollama from reloading the model on every call. The chosen num_ctx is prepended to the stored llm_prompt so it's visible in the debug window. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
13
app.py
13
app.py
@@ -36,6 +36,14 @@ def now_iso() -> str:
|
|||||||
return datetime.utcnow().isoformat()
|
return datetime.utcnow().isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
def _estimate_num_ctx(prompt: str) -> int:
|
||||||
|
needed = len(prompt) // 3 + 2048 # rough token estimate + response buffer
|
||||||
|
for ctx in (4096, 8192, 16384, 32768, 65536):
|
||||||
|
if needed <= ctx:
|
||||||
|
return ctx
|
||||||
|
return 65536
|
||||||
|
|
||||||
|
|
||||||
def init_db():
|
def init_db():
|
||||||
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
|
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
|
||||||
with db() as c:
|
with db() as c:
|
||||||
@@ -371,13 +379,14 @@ def _process_analysis_job(job_id: int):
|
|||||||
+ f"\\nTEXT:\\n{doc['content_md']}\\n"
|
+ f"\\nTEXT:\\n{doc['content_md']}\\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
_job_set(job_id, llm_prompt=llm_prompt)
|
num_ctx = _estimate_num_ctx(llm_prompt)
|
||||||
|
_job_set(job_id, llm_prompt=f"[num_ctx={num_ctx}]\n\n{llm_prompt}")
|
||||||
with _JOB_STREAM_LOCK:
|
with _JOB_STREAM_LOCK:
|
||||||
_JOB_STREAMS[job_id] = {"thinking": "", "response": ""}
|
_JOB_STREAMS[job_id] = {"thinking": "", "response": ""}
|
||||||
|
|
||||||
r = requests.post(
|
r = requests.post(
|
||||||
f"{OLLAMA_BASE_URL}/api/generate",
|
f"{OLLAMA_BASE_URL}/api/generate",
|
||||||
json={"model": OLLAMA_MODEL, "prompt": llm_prompt, "stream": True},
|
json={"model": OLLAMA_MODEL, "prompt": llm_prompt, "stream": True, "options": {"num_ctx": num_ctx}},
|
||||||
stream=True,
|
stream=True,
|
||||||
timeout=1200,
|
timeout=1200,
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user