fix(diarization-ui): prevent repetition loops in Ollama generation
Adds repeat_penalty=1.15 and repeat_last_n=128 to suppress token repetition loops (e.g. "tragen" -> "tragen" -> ...). Also caps output via num_predict (default 4096, configurable via OLLAMA_NUM_PREDICT env var) as a hard stop in case the model still gets stuck. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
API_BASE=http://gx10.aquantico.lan:8093
|
API_BASE=http://gx10.aquantico.lan:8093
|
||||||
OLLAMA_BASE_URL=http://gx10.aquantico.lan:11434
|
OLLAMA_BASE_URL=http://gx10.aquantico.lan:11434
|
||||||
OLLAMA_MODEL=qwen3.5:9b
|
OLLAMA_MODEL=qwen3.5:9b
|
||||||
|
OLLAMA_NUM_PREDICT=4096
|
||||||
|
|||||||
8
app.py
8
app.py
@@ -15,6 +15,7 @@ from fastapi.responses import HTMLResponse, PlainTextResponse, Response, JSONRes
|
|||||||
API_BASE = os.getenv("API_BASE", "http://gx10.aquantico.lan:8093").rstrip("/")
|
API_BASE = os.getenv("API_BASE", "http://gx10.aquantico.lan:8093").rstrip("/")
|
||||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://gx10.aquantico.lan:11434").rstrip("/")
|
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://gx10.aquantico.lan:11434").rstrip("/")
|
||||||
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:9b")
|
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:9b")
|
||||||
|
OLLAMA_NUM_PREDICT = int(os.getenv("OLLAMA_NUM_PREDICT", "4096"))
|
||||||
DB_PATH = os.getenv("DB_PATH", "/data/ui.db")
|
DB_PATH = os.getenv("DB_PATH", "/data/ui.db")
|
||||||
|
|
||||||
app = FastAPI(title="Diarization UI")
|
app = FastAPI(title="Diarization UI")
|
||||||
@@ -386,7 +387,12 @@ def _process_analysis_job(job_id: int):
|
|||||||
|
|
||||||
r = requests.post(
|
r = requests.post(
|
||||||
f"{OLLAMA_BASE_URL}/api/generate",
|
f"{OLLAMA_BASE_URL}/api/generate",
|
||||||
json={"model": OLLAMA_MODEL, "prompt": llm_prompt, "stream": True, "options": {"num_ctx": num_ctx}},
|
json={"model": OLLAMA_MODEL, "prompt": llm_prompt, "stream": True, "options": {
|
||||||
|
"num_ctx": num_ctx,
|
||||||
|
"num_predict": OLLAMA_NUM_PREDICT,
|
||||||
|
"repeat_penalty": 1.15,
|
||||||
|
"repeat_last_n": 128,
|
||||||
|
}},
|
||||||
stream=True,
|
stream=True,
|
||||||
timeout=1200,
|
timeout=1200,
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user