feat(diarization-ui): multi-page app with sidebar, projects, prompt config, db library, markdown download, and qwen analysis runs

This commit is contained in:
2026-03-21 14:02:21 +01:00
parent 25793c022c
commit 119c8b3f7b

438
app.py
View File

@@ -6,14 +6,14 @@ from typing import Optional
import requests import requests
from fastapi import FastAPI, File, Form, HTTPException, UploadFile from fastapi import FastAPI, File, Form, HTTPException, UploadFile
from fastapi.responses import HTMLResponse from fastapi.responses import HTMLResponse, PlainTextResponse
API_BASE = os.getenv("API_BASE", "http://gx10.aquantico.lan:8093").rstrip("/") API_BASE = os.getenv("API_BASE", "http://gx10.aquantico.lan:8093").rstrip("/")
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://gx10.aquantico.lan:11434").rstrip("/") OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://gx10.aquantico.lan:11434").rstrip("/")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:9b") OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:9b")
DB_PATH = os.getenv("DB_PATH", "/data/ui.db") DB_PATH = os.getenv("DB_PATH", "/data/ui.db")
app = FastAPI(title="Diarization UI + LLM") app = FastAPI(title="Diarization UI")
def db(): def db():
@@ -22,32 +22,114 @@ def db():
return conn return conn
def now_iso() -> str:
return datetime.utcnow().isoformat()
def init_db(): def init_db():
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True) os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
with db() as c: with db() as c:
c.execute( c.execute(
""" """
CREATE TABLE IF NOT EXISTS transcripts ( CREATE TABLE IF NOT EXISTS projects (
id INTEGER PRIMARY KEY AUTOINCREMENT, id INTEGER PRIMARY KEY AUTOINCREMENT,
created_at TEXT NOT NULL, name TEXT UNIQUE NOT NULL,
filename TEXT, created_at TEXT NOT NULL
formatted_text TEXT NOT NULL,
raw_json TEXT NOT NULL
) )
""" """
) )
c.execute( c.execute(
""" """
CREATE TABLE IF NOT EXISTS analyses ( CREATE TABLE IF NOT EXISTS prompts (
id INTEGER PRIMARY KEY AUTOINCREMENT, id INTEGER PRIMARY KEY AUTOINCREMENT,
transcript_id INTEGER NOT NULL, name TEXT UNIQUE NOT NULL,
created_at TEXT NOT NULL,
prompt TEXT NOT NULL, prompt TEXT NOT NULL,
answer TEXT NOT NULL, created_at TEXT NOT NULL,
FOREIGN KEY(transcript_id) REFERENCES transcripts(id) updated_at TEXT NOT NULL
) )
""" """
) )
c.execute(
"""
CREATE TABLE IF NOT EXISTS documents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project_id INTEGER NOT NULL,
kind TEXT NOT NULL, -- transcript|analysis
title TEXT NOT NULL,
content_md TEXT NOT NULL,
source_document_id INTEGER,
prompt_id INTEGER,
raw_json TEXT,
created_at TEXT NOT NULL,
FOREIGN KEY(project_id) REFERENCES projects(id),
FOREIGN KEY(source_document_id) REFERENCES documents(id),
FOREIGN KEY(prompt_id) REFERENCES prompts(id)
)
"""
)
# defaults
c.execute("INSERT OR IGNORE INTO projects(name, created_at) VALUES (?,?)", ("Default", now_iso()))
c.execute(
"INSERT OR IGNORE INTO prompts(name, prompt, created_at, updated_at) VALUES (?,?,?,?)",
(
"Zusammenfassung",
"Erstelle eine prägnante Zusammenfassung des Gesprächs in Stichpunkten.",
now_iso(),
now_iso(),
),
)
c.execute(
"INSERT OR IGNORE INTO prompts(name, prompt, created_at, updated_at) VALUES (?,?,?,?)",
(
"Aufgaben",
"Extrahiere alle Aufgaben. Gib pro Aufgabe: Verantwortlich, Aufgabe, Deadline (falls vorhanden), Priorität.",
now_iso(),
now_iso(),
),
)
def layout(title: str, body: str) -> str:
return f"""
<!doctype html>
<html><head><meta charset='utf-8'><meta name='viewport' content='width=device-width, initial-scale=1'>
<title>{title}</title>
<style>
body{{font-family:Arial;margin:0;display:flex;min-height:100vh}}
nav{{width:240px;background:#111;color:#fff;padding:16px}}
nav a{{display:block;color:#fff;text-decoration:none;padding:8px 10px;border-radius:6px;margin:4px 0}}
nav a:hover{{background:#2a2a2a}}
main{{flex:1;padding:20px;max-width:1200px}}
.card{{border:1px solid #ddd;border-radius:8px;padding:12px;margin:10px 0}}
input,select,textarea,button{{padding:8px;font-size:14px}}
textarea{{width:100%;min-height:140px}}
pre{{white-space:pre-wrap;background:#111;color:#0f0;padding:10px;border-radius:8px}}
.row{{display:flex;gap:8px;flex-wrap:wrap;align-items:center}}
small{{color:#666}}
</style></head>
<body>
<nav>
<h3>Menü</h3>
<a href='/'>Upload</a>
<a href='/library'>Datenbank</a>
<a href='/prompts'>Prompt-Konfig</a>
<a href='/run'>Prompt ausführen</a>
<a href='/healthz'>Health</a>
</nav>
<main>{body}</main>
</body></html>
"""
def get_projects():
with db() as c:
return c.execute("SELECT id,name FROM projects ORDER BY name").fetchall()
def get_prompts():
with db() as c:
return c.execute("SELECT id,name,prompt FROM prompts ORDER BY name").fetchall()
@app.on_event("startup") @app.on_event("startup")
@@ -57,155 +139,259 @@ def startup():
@app.get("/healthz") @app.get("/healthz")
def healthz(): def healthz():
return { return {"ok": True, "api_base": API_BASE, "ollama_base_url": OLLAMA_BASE_URL, "ollama_model": OLLAMA_MODEL, "db_path": DB_PATH}
"ok": True,
"api_base": API_BASE,
"ollama_base_url": OLLAMA_BASE_URL,
"ollama_model": OLLAMA_MODEL,
"db_path": DB_PATH,
}
@app.get("/", response_class=HTMLResponse) @app.get("/", response_class=HTMLResponse)
def index(): def upload_page(msg: str = ""):
return """ projects = get_projects()
<!doctype html> opts = "".join([f"<option value='{p['id']}'>{p['name']}</option>" for p in projects])
<html><head><meta charset='utf-8'><meta name='viewport' content='width=device-width, initial-scale=1'> body = f"""
<title>Diarization UI</title> <h2>Audio Upload</h2>
<style>body{font-family:Arial;max-width:1100px;margin:24px auto;padding:0 12px}.row{display:flex;gap:8px;flex-wrap:wrap}button{padding:8px 12px}pre{white-space:pre-wrap;background:#111;color:#0f0;padding:10px;border-radius:8px;min-height:140px}.card{border:1px solid #ddd;border-radius:8px;padding:10px;margin:10px 0}</style> <p>Audio wird transkribiert + mit Sprechern angereichert und als Dokument gespeichert.</p>
</head> {f"<p><b>{msg}</b></p>" if msg else ""}
<body> <form action='/upload' method='post' enctype='multipart/form-data' class='card'>
<h2>Upload -> Transcribe + Diarize -> speichern -> LLM Analyse</h2> <div class='row'>
<div class='row'> <label>Projekt:</label>
<input id='f' type='file' accept='audio/*'> <select name='project_id'>{opts}</select>
<button onclick='processFile()'>Verarbeiten</button> <input name='title' placeholder='Titel (optional)'>
</div> </div>
<p id='status'></p> <div class='row' style='margin-top:8px'>
<pre id='out'></pre> <input type='file' name='file' accept='audio/*' required>
<button type='submit'>Verarbeiten & speichern</button>
<h3>Analyse</h3> </div>
<div class='row'> </form>
<input id='tid' type='number' placeholder='transcript_id'>
<input id='prompt' style='width:500px' placeholder='z.B. Fasse zusammen und extrahiere Aufgaben mit Verantwortlichen.'>
<button onclick='analyze()'>Mit Qwen analysieren</button>
</div>
<pre id='analysis'></pre>
<h3>Gespeicherte Transkripte</h3>
<button onclick='loadTranscripts()'>Neu laden</button>
<div id='list'></div>
<script>
async function processFile(){
const fi=document.getElementById('f');
if(!fi.files.length){alert('Datei wählen');return;}
const fd=new FormData(); fd.append('file',fi.files[0]);
document.getElementById('status').textContent='Läuft...';
const r=await fetch('/process',{method:'POST',body:fd});
const j=await r.json();
document.getElementById('status').textContent = r.ok ? `OK transcript_id=${j.transcript_id}` : `Fehler ${r.status}`;
document.getElementById('out').textContent = JSON.stringify(j,null,2);
if(j.transcript_id){document.getElementById('tid').value=j.transcript_id;}
loadTranscripts();
}
async function analyze(){
const transcript_id=parseInt(document.getElementById('tid').value||'0');
const prompt=document.getElementById('prompt').value;
const r=await fetch('/analyze',{method:'POST',headers:{'Content-Type':'application/x-www-form-urlencoded'},body:new URLSearchParams({transcript_id,prompt})});
const j=await r.json();
document.getElementById('analysis').textContent = JSON.stringify(j,null,2);
}
async function loadTranscripts(){
const r=await fetch('/transcripts');
const j=await r.json();
const root=document.getElementById('list');
root.innerHTML='';
for(const t of j.items){
const d=document.createElement('div'); d.className='card';
d.innerHTML=`<b>#${t.id}</b> ${t.created_at} ${t.filename||''}<br><pre>${(t.formatted_text||'').slice(0,1200)}</pre>`;
root.appendChild(d);
}
}
loadTranscripts();
</script>
</body></html>
""" """
return layout("Upload", body)
@app.post("/process") @app.post("/projects", response_class=HTMLResponse)
async def process(file: UploadFile = File(...)): def add_project(name: str = Form(...)):
with db() as c:
c.execute("INSERT INTO projects(name, created_at) VALUES (?,?)", (name.strip(), now_iso()))
return HTMLResponse("<meta http-equiv='refresh' content='0; url=/prompts'>")
@app.post("/upload", response_class=HTMLResponse)
async def upload(project_id: int = Form(...), title: str = Form(""), file: UploadFile = File(...)):
data = await file.read() data = await file.read()
if not data: if not data:
raise HTTPException(400, "empty file") raise HTTPException(400, "Leere Datei")
files = {"file": (file.filename or "audio.bin", data, file.content_type or "application/octet-stream")} files = {"file": (file.filename or "audio.bin", data, file.content_type or "application/octet-stream")}
try:
r = requests.post(f"{API_BASE}/transcribe-diarize", files=files, timeout=1800) r = requests.post(f"{API_BASE}/transcribe-diarize", files=files, timeout=1800)
except Exception as e:
raise HTTPException(502, f"API unreachable: {e}")
if r.status_code >= 400: if r.status_code >= 400:
raise HTTPException(r.status_code, r.text) raise HTTPException(r.status_code, r.text)
payload = r.json() payload = r.json()
formatted = payload.get("formatted_text", "") content_md = payload.get("formatted_text", "")
doc_title = (title or "").strip() or (file.filename or "Transkript")
with db() as c: with db() as c:
cur = c.execute( cur = c.execute(
"INSERT INTO transcripts(created_at, filename, formatted_text, raw_json) VALUES (?,?,?,?)", "INSERT INTO documents(project_id, kind, title, content_md, raw_json, created_at) VALUES (?,?,?,?,?,?)",
(datetime.utcnow().isoformat(), file.filename, formatted, json.dumps(payload, ensure_ascii=False)), (project_id, "transcript", doc_title, content_md, json.dumps(payload, ensure_ascii=False), now_iso()),
) )
transcript_id = cur.lastrowid doc_id = cur.lastrowid
return {"ok": True, "transcript_id": transcript_id, **payload} return HTMLResponse(f"<meta http-equiv='refresh' content='0; url=/document/{doc_id}'>")
@app.get("/transcripts") @app.get("/library", response_class=HTMLResponse)
def transcripts(limit: int = 20): def library(project_id: Optional[int] = None):
with db() as c: with db() as c:
rows = c.execute( projects = c.execute("SELECT id,name FROM projects ORDER BY name").fetchall()
"SELECT id, created_at, filename, formatted_text FROM transcripts ORDER BY id DESC LIMIT ?", if project_id:
(limit,), docs = c.execute(
"""
SELECT d.id,d.kind,d.title,d.created_at,p.name AS project
FROM documents d JOIN projects p ON p.id=d.project_id
WHERE d.project_id=? ORDER BY d.id DESC
""",
(project_id,),
).fetchall() ).fetchall()
return {"items": [dict(r) for r in rows]} else:
docs = c.execute(
"""
SELECT d.id,d.kind,d.title,d.created_at,p.name AS project
FROM documents d JOIN projects p ON p.id=d.project_id
ORDER BY d.id DESC LIMIT 200
"""
).fetchall()
p_opts = "<option value=''>Alle</option>" + "".join(
[f"<option value='{p['id']}' {'selected' if project_id==p['id'] else ''}>{p['name']}</option>" for p in projects]
)
items = "".join(
[
f"<div class='card'><b>#{d['id']}</b> [{d['kind']}] {d['title']}<br><small>{d['project']} · {d['created_at']}</small><br>"
f"<a href='/document/{d['id']}'>Ansehen</a> | <a href='/document/{d['id']}.md'>Download .md</a></div>"
for d in docs
]
)
body = f"""
<h2>Datenbank / Dokumente</h2>
<form method='get' class='row card'>
<label>Projekt:</label>
<select name='project_id'>{p_opts}</select>
<button type='submit'>Filtern</button>
</form>
{items or '<p>Keine Einträge.</p>'}
"""
return layout("Library", body)
@app.post("/analyze") @app.get("/document/{doc_id}", response_class=HTMLResponse)
def analyze(transcript_id: int = Form(...), prompt: str = Form(...)): def view_document(doc_id: int):
with db() as c: with db() as c:
row = c.execute("SELECT formatted_text FROM transcripts WHERE id=?", (transcript_id,)).fetchone() d = c.execute(
if not row: """
raise HTTPException(404, "transcript not found") SELECT d.*, p.name AS project, pr.name AS prompt_name
FROM documents d
JOIN projects p ON p.id=d.project_id
LEFT JOIN prompts pr ON pr.id=d.prompt_id
WHERE d.id=?
""",
(doc_id,),
).fetchone()
if not d:
raise HTTPException(404, "not found")
body = f"""
<h2>Dokument #{d['id']} {d['title']}</h2>
<p><small>Projekt: {d['project']} · Typ: {d['kind']} · {d['created_at']}</small></p>
<p><a href='/document/{doc_id}.md'>Download .md</a></p>
<pre>{(d['content_md'] or '').replace('<','&lt;')}</pre>
"""
return layout("Dokument", body)
@app.get("/document/{doc_id}.md", response_class=PlainTextResponse)
def download_md(doc_id: int):
with db() as c:
d = c.execute("SELECT title,content_md FROM documents WHERE id=?", (doc_id,)).fetchone()
if not d:
raise HTTPException(404, "not found")
return PlainTextResponse(d["content_md"], headers={"Content-Disposition": f"attachment; filename=document_{doc_id}.md"})
@app.get("/prompts", response_class=HTMLResponse)
def prompts_page():
with db() as c:
prompts = c.execute("SELECT * FROM prompts ORDER BY name").fetchall()
projects = c.execute("SELECT id,name FROM projects ORDER BY name").fetchall()
p_list = "".join(
[
f"<div class='card'><b>{p['name']}</b><pre>{(p['prompt'] or '').replace('<','&lt;')}</pre>"
f"<form method='post' action='/prompts/update'><input type='hidden' name='id' value='{p['id']}'><input name='name' value='{p['name']}'><br><textarea name='prompt'>{p['prompt']}</textarea><br><button>Speichern</button></form></div>"
for p in prompts
]
)
project_opts = "".join([f"<option value='{p['name']}'>{p['name']}</option>" for p in projects])
body = f"""
<h2>Prompt-Konfiguration</h2>
<div class='card'>
<form method='post' action='/prompts/add'>
<h4>Neuer Prompt</h4>
<input name='name' placeholder='Name' required>
<br><textarea name='prompt' placeholder='Prompttext' required></textarea>
<br><button type='submit'>Anlegen</button>
</form>
</div>
<div class='card'>
<form method='post' action='/projects'>
<h4>Neues Projekt</h4>
<input name='name' list='projectNames' placeholder='Projektname' required>
<datalist id='projectNames'>{project_opts}</datalist>
<button type='submit'>Anlegen</button>
</form>
</div>
{p_list}
"""
return layout("Prompts", body)
@app.post("/prompts/add", response_class=HTMLResponse)
def prompt_add(name: str = Form(...), prompt: str = Form(...)):
with db() as c:
c.execute(
"INSERT INTO prompts(name,prompt,created_at,updated_at) VALUES (?,?,?,?)",
(name.strip(), prompt.strip(), now_iso(), now_iso()),
)
return HTMLResponse("<meta http-equiv='refresh' content='0; url=/prompts'>")
@app.post("/prompts/update", response_class=HTMLResponse)
def prompt_update(id: int = Form(...), name: str = Form(...), prompt: str = Form(...)):
with db() as c:
c.execute("UPDATE prompts SET name=?, prompt=?, updated_at=? WHERE id=?", (name.strip(), prompt.strip(), now_iso(), id))
return HTMLResponse("<meta http-equiv='refresh' content='0; url=/prompts'>")
@app.get("/run", response_class=HTMLResponse)
def run_page():
with db() as c:
docs = c.execute("SELECT id,title,kind,created_at FROM documents ORDER BY id DESC LIMIT 200").fetchall()
prompts = c.execute("SELECT id,name FROM prompts ORDER BY name").fetchall()
d_opts = "".join([f"<option value='{d['id']}'>#{d['id']} [{d['kind']}] {d['title']}</option>" for d in docs])
p_opts = "".join([f"<option value='{p['id']}'>{p['name']}</option>" for p in prompts])
body = f"""
<h2>Prompt ausführen</h2>
<form method='post' action='/run' class='card'>
<label>Dokument:</label><br>
<select name='document_id' style='width:100%'>{d_opts}</select><br><br>
<label>Prompt:</label><br>
<select name='prompt_id' style='width:100%'>{p_opts}</select><br><br>
<button type='submit'>Ausführen (Qwen)</button>
</form>
"""
return layout("Run", body)
@app.post("/run", response_class=HTMLResponse)
def run_prompt(document_id: int = Form(...), prompt_id: int = Form(...)):
with db() as c:
doc = c.execute("SELECT * FROM documents WHERE id=?", (document_id,)).fetchone()
prm = c.execute("SELECT * FROM prompts WHERE id=?", (prompt_id,)).fetchone()
if not doc or not prm:
raise HTTPException(404, "Dokument oder Prompt nicht gefunden")
transcript_text = row[0]
llm_prompt = ( llm_prompt = (
"Du bist ein Meeting-Analyst. Arbeite auf Deutsch.\n" "Du bist ein präziser Assistent. Antworte auf Deutsch.\n"
"Erzeuge präzise Ausgabe für den folgenden Auftrag.\n\n" f"AUFTRAG:\n{prm['prompt']}\n\n"
f"AUFTRAG:\n{prompt}\n\n" f"TEXT:\n{doc['content_md']}\n"
f"TRANSKRIPT:\n{transcript_text}\n"
) )
body = { r = requests.post(
"model": OLLAMA_MODEL, f"{OLLAMA_BASE_URL}/api/generate",
"prompt": llm_prompt, json={"model": OLLAMA_MODEL, "prompt": llm_prompt, "stream": False},
"stream": False, timeout=1200,
} )
try:
r = requests.post(f"{OLLAMA_BASE_URL}/api/generate", json=body, timeout=600)
except Exception as e:
raise HTTPException(502, f"Ollama unreachable: {e}")
if r.status_code >= 400: if r.status_code >= 400:
raise HTTPException(r.status_code, r.text) raise HTTPException(r.status_code, r.text)
answer = r.json().get("response", "")
j = r.json()
answer = j.get("response", "")
with db() as c: with db() as c:
cur = c.execute( cur = c.execute(
"INSERT INTO analyses(transcript_id, created_at, prompt, answer) VALUES (?,?,?,?)", """
(transcript_id, datetime.utcnow().isoformat(), prompt, answer), INSERT INTO documents(project_id, kind, title, content_md, source_document_id, prompt_id, raw_json, created_at)
VALUES (?,?,?,?,?,?,?,?)
""",
(
doc["project_id"],
"analysis",
f"Analyse: {prm['name']} · {doc['title']}",
answer,
doc["id"],
prm["id"],
json.dumps({"ollama_response": r.json()}, ensure_ascii=False),
now_iso(),
),
) )
analysis_id = cur.lastrowid new_id = cur.lastrowid
return {"ok": True, "analysis_id": analysis_id, "answer": answer} return HTMLResponse(f"<meta http-equiv='refresh' content='0; url=/document/{new_id}'>")