diff --git a/app.py b/app.py index c391eda..4fe9860 100644 --- a/app.py +++ b/app.py @@ -6,14 +6,14 @@ from typing import Optional import requests from fastapi import FastAPI, File, Form, HTTPException, UploadFile -from fastapi.responses import HTMLResponse +from fastapi.responses import HTMLResponse, PlainTextResponse API_BASE = os.getenv("API_BASE", "http://gx10.aquantico.lan:8093").rstrip("/") OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://gx10.aquantico.lan:11434").rstrip("/") OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:9b") DB_PATH = os.getenv("DB_PATH", "/data/ui.db") -app = FastAPI(title="Diarization UI + LLM") +app = FastAPI(title="Diarization UI") def db(): @@ -22,32 +22,114 @@ def db(): return conn +def now_iso() -> str: + return datetime.utcnow().isoformat() + + def init_db(): os.makedirs(os.path.dirname(DB_PATH), exist_ok=True) with db() as c: c.execute( """ - CREATE TABLE IF NOT EXISTS transcripts ( + CREATE TABLE IF NOT EXISTS projects ( id INTEGER PRIMARY KEY AUTOINCREMENT, - created_at TEXT NOT NULL, - filename TEXT, - formatted_text TEXT NOT NULL, - raw_json TEXT NOT NULL + name TEXT UNIQUE NOT NULL, + created_at TEXT NOT NULL ) """ ) c.execute( """ - CREATE TABLE IF NOT EXISTS analyses ( + CREATE TABLE IF NOT EXISTS prompts ( id INTEGER PRIMARY KEY AUTOINCREMENT, - transcript_id INTEGER NOT NULL, - created_at TEXT NOT NULL, + name TEXT UNIQUE NOT NULL, prompt TEXT NOT NULL, - answer TEXT NOT NULL, - FOREIGN KEY(transcript_id) REFERENCES transcripts(id) + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL ) """ ) + c.execute( + """ + CREATE TABLE IF NOT EXISTS documents ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_id INTEGER NOT NULL, + kind TEXT NOT NULL, -- transcript|analysis + title TEXT NOT NULL, + content_md TEXT NOT NULL, + source_document_id INTEGER, + prompt_id INTEGER, + raw_json TEXT, + created_at TEXT NOT NULL, + FOREIGN KEY(project_id) REFERENCES projects(id), + FOREIGN KEY(source_document_id) REFERENCES documents(id), + FOREIGN KEY(prompt_id) REFERENCES prompts(id) + ) + """ + ) + + # defaults + c.execute("INSERT OR IGNORE INTO projects(name, created_at) VALUES (?,?)", ("Default", now_iso())) + c.execute( + "INSERT OR IGNORE INTO prompts(name, prompt, created_at, updated_at) VALUES (?,?,?,?)", + ( + "Zusammenfassung", + "Erstelle eine prägnante Zusammenfassung des Gesprächs in Stichpunkten.", + now_iso(), + now_iso(), + ), + ) + c.execute( + "INSERT OR IGNORE INTO prompts(name, prompt, created_at, updated_at) VALUES (?,?,?,?)", + ( + "Aufgaben", + "Extrahiere alle Aufgaben. Gib pro Aufgabe: Verantwortlich, Aufgabe, Deadline (falls vorhanden), Priorität.", + now_iso(), + now_iso(), + ), + ) + + +def layout(title: str, body: str) -> str: + return f""" + + +{title} + + + +
{body}
+ +""" + + +def get_projects(): + with db() as c: + return c.execute("SELECT id,name FROM projects ORDER BY name").fetchall() + + +def get_prompts(): + with db() as c: + return c.execute("SELECT id,name,prompt FROM prompts ORDER BY name").fetchall() @app.on_event("startup") @@ -57,155 +139,259 @@ def startup(): @app.get("/healthz") def healthz(): - return { - "ok": True, - "api_base": API_BASE, - "ollama_base_url": OLLAMA_BASE_URL, - "ollama_model": OLLAMA_MODEL, - "db_path": DB_PATH, - } + return {"ok": True, "api_base": API_BASE, "ollama_base_url": OLLAMA_BASE_URL, "ollama_model": OLLAMA_MODEL, "db_path": DB_PATH} @app.get("/", response_class=HTMLResponse) -def index(): - return """ - - -Diarization UI - - - -

Upload -> Transcribe + Diarize -> speichern -> LLM Analyse

-
- - -
-

-

-
-

Analyse

-
- - - -
-

-
-

Gespeicherte Transkripte

- -
- - - +def upload_page(msg: str = ""): + projects = get_projects() + opts = "".join([f"" for p in projects]) + body = f""" +

Audio Upload

+

Audio wird transkribiert + mit Sprechern angereichert und als Dokument gespeichert.

+{f"

{msg}

" if msg else ""} +
+
+ + + +
+
+ + +
+
""" + return layout("Upload", body) -@app.post("/process") -async def process(file: UploadFile = File(...)): +@app.post("/projects", response_class=HTMLResponse) +def add_project(name: str = Form(...)): + with db() as c: + c.execute("INSERT INTO projects(name, created_at) VALUES (?,?)", (name.strip(), now_iso())) + return HTMLResponse("") + + +@app.post("/upload", response_class=HTMLResponse) +async def upload(project_id: int = Form(...), title: str = Form(""), file: UploadFile = File(...)): data = await file.read() if not data: - raise HTTPException(400, "empty file") + raise HTTPException(400, "Leere Datei") files = {"file": (file.filename or "audio.bin", data, file.content_type or "application/octet-stream")} - try: - r = requests.post(f"{API_BASE}/transcribe-diarize", files=files, timeout=1800) - except Exception as e: - raise HTTPException(502, f"API unreachable: {e}") - + r = requests.post(f"{API_BASE}/transcribe-diarize", files=files, timeout=1800) if r.status_code >= 400: raise HTTPException(r.status_code, r.text) payload = r.json() - formatted = payload.get("formatted_text", "") + content_md = payload.get("formatted_text", "") + doc_title = (title or "").strip() or (file.filename or "Transkript") with db() as c: cur = c.execute( - "INSERT INTO transcripts(created_at, filename, formatted_text, raw_json) VALUES (?,?,?,?)", - (datetime.utcnow().isoformat(), file.filename, formatted, json.dumps(payload, ensure_ascii=False)), + "INSERT INTO documents(project_id, kind, title, content_md, raw_json, created_at) VALUES (?,?,?,?,?,?)", + (project_id, "transcript", doc_title, content_md, json.dumps(payload, ensure_ascii=False), now_iso()), ) - transcript_id = cur.lastrowid + doc_id = cur.lastrowid - return {"ok": True, "transcript_id": transcript_id, **payload} + return HTMLResponse(f"") -@app.get("/transcripts") -def transcripts(limit: int = 20): +@app.get("/library", response_class=HTMLResponse) +def library(project_id: Optional[int] = None): with db() as c: - rows = c.execute( - "SELECT id, created_at, filename, formatted_text FROM transcripts ORDER BY id DESC LIMIT ?", - (limit,), - ).fetchall() - return {"items": [dict(r) for r in rows]} + projects = c.execute("SELECT id,name FROM projects ORDER BY name").fetchall() + if project_id: + docs = c.execute( + """ + SELECT d.id,d.kind,d.title,d.created_at,p.name AS project + FROM documents d JOIN projects p ON p.id=d.project_id + WHERE d.project_id=? ORDER BY d.id DESC + """, + (project_id,), + ).fetchall() + else: + docs = c.execute( + """ + SELECT d.id,d.kind,d.title,d.created_at,p.name AS project + FROM documents d JOIN projects p ON p.id=d.project_id + ORDER BY d.id DESC LIMIT 200 + """ + ).fetchall() + + p_opts = "" + "".join( + [f"" for p in projects] + ) + items = "".join( + [ + f"
#{d['id']} [{d['kind']}] {d['title']}
{d['project']} · {d['created_at']}
" + f"Ansehen | Download .md
" + for d in docs + ] + ) + body = f""" +

Datenbank / Dokumente

+
+ + + +
+{items or '

Keine Einträge.

'} +""" + return layout("Library", body) -@app.post("/analyze") -def analyze(transcript_id: int = Form(...), prompt: str = Form(...)): +@app.get("/document/{doc_id}", response_class=HTMLResponse) +def view_document(doc_id: int): with db() as c: - row = c.execute("SELECT formatted_text FROM transcripts WHERE id=?", (transcript_id,)).fetchone() - if not row: - raise HTTPException(404, "transcript not found") + d = c.execute( + """ + SELECT d.*, p.name AS project, pr.name AS prompt_name + FROM documents d + JOIN projects p ON p.id=d.project_id + LEFT JOIN prompts pr ON pr.id=d.prompt_id + WHERE d.id=? + """, + (doc_id,), + ).fetchone() + if not d: + raise HTTPException(404, "not found") + + body = f""" +

Dokument #{d['id']} – {d['title']}

+

Projekt: {d['project']} · Typ: {d['kind']} · {d['created_at']}

+

Download .md

+
{(d['content_md'] or '').replace('<','<')}
+""" + return layout("Dokument", body) + + +@app.get("/document/{doc_id}.md", response_class=PlainTextResponse) +def download_md(doc_id: int): + with db() as c: + d = c.execute("SELECT title,content_md FROM documents WHERE id=?", (doc_id,)).fetchone() + if not d: + raise HTTPException(404, "not found") + return PlainTextResponse(d["content_md"], headers={"Content-Disposition": f"attachment; filename=document_{doc_id}.md"}) + + +@app.get("/prompts", response_class=HTMLResponse) +def prompts_page(): + with db() as c: + prompts = c.execute("SELECT * FROM prompts ORDER BY name").fetchall() + projects = c.execute("SELECT id,name FROM projects ORDER BY name").fetchall() + + p_list = "".join( + [ + f"
{p['name']}
{(p['prompt'] or '').replace('<','<')}
" + f"


" + for p in prompts + ] + ) + project_opts = "".join([f"" for p in projects]) + + body = f""" +

Prompt-Konfiguration

+
+
+

Neuer Prompt

+ +
+
+
+
+
+
+

Neues Projekt

+ + {project_opts} + +
+
+{p_list} +""" + return layout("Prompts", body) + + +@app.post("/prompts/add", response_class=HTMLResponse) +def prompt_add(name: str = Form(...), prompt: str = Form(...)): + with db() as c: + c.execute( + "INSERT INTO prompts(name,prompt,created_at,updated_at) VALUES (?,?,?,?)", + (name.strip(), prompt.strip(), now_iso(), now_iso()), + ) + return HTMLResponse("") + + +@app.post("/prompts/update", response_class=HTMLResponse) +def prompt_update(id: int = Form(...), name: str = Form(...), prompt: str = Form(...)): + with db() as c: + c.execute("UPDATE prompts SET name=?, prompt=?, updated_at=? WHERE id=?", (name.strip(), prompt.strip(), now_iso(), id)) + return HTMLResponse("") + + +@app.get("/run", response_class=HTMLResponse) +def run_page(): + with db() as c: + docs = c.execute("SELECT id,title,kind,created_at FROM documents ORDER BY id DESC LIMIT 200").fetchall() + prompts = c.execute("SELECT id,name FROM prompts ORDER BY name").fetchall() + + d_opts = "".join([f"" for d in docs]) + p_opts = "".join([f"" for p in prompts]) + + body = f""" +

Prompt ausführen

+
+
+

+
+

+ +
+""" + return layout("Run", body) + + +@app.post("/run", response_class=HTMLResponse) +def run_prompt(document_id: int = Form(...), prompt_id: int = Form(...)): + with db() as c: + doc = c.execute("SELECT * FROM documents WHERE id=?", (document_id,)).fetchone() + prm = c.execute("SELECT * FROM prompts WHERE id=?", (prompt_id,)).fetchone() + if not doc or not prm: + raise HTTPException(404, "Dokument oder Prompt nicht gefunden") - transcript_text = row[0] llm_prompt = ( - "Du bist ein Meeting-Analyst. Arbeite auf Deutsch.\n" - "Erzeuge präzise Ausgabe für den folgenden Auftrag.\n\n" - f"AUFTRAG:\n{prompt}\n\n" - f"TRANSKRIPT:\n{transcript_text}\n" + "Du bist ein präziser Assistent. Antworte auf Deutsch.\n" + f"AUFTRAG:\n{prm['prompt']}\n\n" + f"TEXT:\n{doc['content_md']}\n" ) - body = { - "model": OLLAMA_MODEL, - "prompt": llm_prompt, - "stream": False, - } - try: - r = requests.post(f"{OLLAMA_BASE_URL}/api/generate", json=body, timeout=600) - except Exception as e: - raise HTTPException(502, f"Ollama unreachable: {e}") - + r = requests.post( + f"{OLLAMA_BASE_URL}/api/generate", + json={"model": OLLAMA_MODEL, "prompt": llm_prompt, "stream": False}, + timeout=1200, + ) if r.status_code >= 400: raise HTTPException(r.status_code, r.text) - - j = r.json() - answer = j.get("response", "") + answer = r.json().get("response", "") with db() as c: cur = c.execute( - "INSERT INTO analyses(transcript_id, created_at, prompt, answer) VALUES (?,?,?,?)", - (transcript_id, datetime.utcnow().isoformat(), prompt, answer), + """ + INSERT INTO documents(project_id, kind, title, content_md, source_document_id, prompt_id, raw_json, created_at) + VALUES (?,?,?,?,?,?,?,?) + """, + ( + doc["project_id"], + "analysis", + f"Analyse: {prm['name']} · {doc['title']}", + answer, + doc["id"], + prm["id"], + json.dumps({"ollama_response": r.json()}, ensure_ascii=False), + now_iso(), + ), ) - analysis_id = cur.lastrowid + new_id = cur.lastrowid - return {"ok": True, "analysis_id": analysis_id, "answer": answer} + return HTMLResponse(f"")