diff --git a/.env.example b/.env.example
index cf68448..147c8f1 100644
--- a/.env.example
+++ b/.env.example
@@ -1,3 +1,4 @@
 API_BASE=http://gx10.aquantico.lan:8093
 OLLAMA_BASE_URL=http://gx10.aquantico.lan:11434
 OLLAMA_MODEL=qwen3.5:9b
+OLLAMA_NUM_PREDICT=4096
diff --git a/app.py b/app.py
index be38f5e..35db195 100644
--- a/app.py
+++ b/app.py
@@ -15,6 +15,7 @@ from fastapi.responses import HTMLResponse, PlainTextResponse, Response, JSONRes
 API_BASE = os.getenv("API_BASE", "http://gx10.aquantico.lan:8093").rstrip("/")
 OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://gx10.aquantico.lan:11434").rstrip("/")
 OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:9b")
+OLLAMA_NUM_PREDICT = int(os.getenv("OLLAMA_NUM_PREDICT", "4096"))
 DB_PATH = os.getenv("DB_PATH", "/data/ui.db")
 
 app = FastAPI(title="Diarization UI")
@@ -386,7 +387,12 @@ def _process_analysis_job(job_id: int):
 
         r = requests.post(
             f"{OLLAMA_BASE_URL}/api/generate",
-            json={"model": OLLAMA_MODEL, "prompt": llm_prompt, "stream": True, "options": {"num_ctx": num_ctx}},
+            json={"model": OLLAMA_MODEL, "prompt": llm_prompt, "stream": True, "options": {
+                "num_ctx": num_ctx,
+                "num_predict": OLLAMA_NUM_PREDICT,
+                "repeat_penalty": 1.15,
+                "repeat_last_n": 128,
+            }},
             stream=True,
             timeout=1200,
         )