diff --git a/app.py b/app.py index c391eda..4fe9860 100644 --- a/app.py +++ b/app.py @@ -6,14 +6,14 @@ from typing import Optional import requests from fastapi import FastAPI, File, Form, HTTPException, UploadFile -from fastapi.responses import HTMLResponse +from fastapi.responses import HTMLResponse, PlainTextResponse API_BASE = os.getenv("API_BASE", "http://gx10.aquantico.lan:8093").rstrip("/") OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://gx10.aquantico.lan:11434").rstrip("/") OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:9b") DB_PATH = os.getenv("DB_PATH", "/data/ui.db") -app = FastAPI(title="Diarization UI + LLM") +app = FastAPI(title="Diarization UI") def db(): @@ -22,32 +22,114 @@ def db(): return conn +def now_iso() -> str: + return datetime.utcnow().isoformat() + + def init_db(): os.makedirs(os.path.dirname(DB_PATH), exist_ok=True) with db() as c: c.execute( """ - CREATE TABLE IF NOT EXISTS transcripts ( + CREATE TABLE IF NOT EXISTS projects ( id INTEGER PRIMARY KEY AUTOINCREMENT, - created_at TEXT NOT NULL, - filename TEXT, - formatted_text TEXT NOT NULL, - raw_json TEXT NOT NULL + name TEXT UNIQUE NOT NULL, + created_at TEXT NOT NULL ) """ ) c.execute( """ - CREATE TABLE IF NOT EXISTS analyses ( + CREATE TABLE IF NOT EXISTS prompts ( id INTEGER PRIMARY KEY AUTOINCREMENT, - transcript_id INTEGER NOT NULL, - created_at TEXT NOT NULL, + name TEXT UNIQUE NOT NULL, prompt TEXT NOT NULL, - answer TEXT NOT NULL, - FOREIGN KEY(transcript_id) REFERENCES transcripts(id) + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL ) """ ) + c.execute( + """ + CREATE TABLE IF NOT EXISTS documents ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_id INTEGER NOT NULL, + kind TEXT NOT NULL, -- transcript|analysis + title TEXT NOT NULL, + content_md TEXT NOT NULL, + source_document_id INTEGER, + prompt_id INTEGER, + raw_json TEXT, + created_at TEXT NOT NULL, + FOREIGN KEY(project_id) REFERENCES projects(id), + FOREIGN KEY(source_document_id) REFERENCES documents(id), + FOREIGN KEY(prompt_id) REFERENCES prompts(id) + ) + """ + ) + + # defaults + c.execute("INSERT OR IGNORE INTO projects(name, created_at) VALUES (?,?)", ("Default", now_iso())) + c.execute( + "INSERT OR IGNORE INTO prompts(name, prompt, created_at, updated_at) VALUES (?,?,?,?)", + ( + "Zusammenfassung", + "Erstelle eine prägnante Zusammenfassung des Gesprächs in Stichpunkten.", + now_iso(), + now_iso(), + ), + ) + c.execute( + "INSERT OR IGNORE INTO prompts(name, prompt, created_at, updated_at) VALUES (?,?,?,?)", + ( + "Aufgaben", + "Extrahiere alle Aufgaben. Gib pro Aufgabe: Verantwortlich, Aufgabe, Deadline (falls vorhanden), Priorität.", + now_iso(), + now_iso(), + ), + ) + + +def layout(title: str, body: str) -> str: + return f""" + +
+Audio wird transkribiert + mit Sprechern angereichert und als Dokument gespeichert.
+{f"{msg}
" if msg else ""} + """ + return layout("Upload", body) -@app.post("/process") -async def process(file: UploadFile = File(...)): +@app.post("/projects", response_class=HTMLResponse) +def add_project(name: str = Form(...)): + with db() as c: + c.execute("INSERT INTO projects(name, created_at) VALUES (?,?)", (name.strip(), now_iso())) + return HTMLResponse("") + + +@app.post("/upload", response_class=HTMLResponse) +async def upload(project_id: int = Form(...), title: str = Form(""), file: UploadFile = File(...)): data = await file.read() if not data: - raise HTTPException(400, "empty file") + raise HTTPException(400, "Leere Datei") files = {"file": (file.filename or "audio.bin", data, file.content_type or "application/octet-stream")} - try: - r = requests.post(f"{API_BASE}/transcribe-diarize", files=files, timeout=1800) - except Exception as e: - raise HTTPException(502, f"API unreachable: {e}") - + r = requests.post(f"{API_BASE}/transcribe-diarize", files=files, timeout=1800) if r.status_code >= 400: raise HTTPException(r.status_code, r.text) payload = r.json() - formatted = payload.get("formatted_text", "") + content_md = payload.get("formatted_text", "") + doc_title = (title or "").strip() or (file.filename or "Transkript") with db() as c: cur = c.execute( - "INSERT INTO transcripts(created_at, filename, formatted_text, raw_json) VALUES (?,?,?,?)", - (datetime.utcnow().isoformat(), file.filename, formatted, json.dumps(payload, ensure_ascii=False)), + "INSERT INTO documents(project_id, kind, title, content_md, raw_json, created_at) VALUES (?,?,?,?,?,?)", + (project_id, "transcript", doc_title, content_md, json.dumps(payload, ensure_ascii=False), now_iso()), ) - transcript_id = cur.lastrowid + doc_id = cur.lastrowid - return {"ok": True, "transcript_id": transcript_id, **payload} + return HTMLResponse(f"") -@app.get("/transcripts") -def transcripts(limit: int = 20): +@app.get("/library", response_class=HTMLResponse) +def library(project_id: Optional[int] = None): with db() as c: - rows = c.execute( - "SELECT id, created_at, filename, formatted_text FROM transcripts ORDER BY id DESC LIMIT ?", - (limit,), - ).fetchall() - return {"items": [dict(r) for r in rows]} + projects = c.execute("SELECT id,name FROM projects ORDER BY name").fetchall() + if project_id: + docs = c.execute( + """ + SELECT d.id,d.kind,d.title,d.created_at,p.name AS project + FROM documents d JOIN projects p ON p.id=d.project_id + WHERE d.project_id=? ORDER BY d.id DESC + """, + (project_id,), + ).fetchall() + else: + docs = c.execute( + """ + SELECT d.id,d.kind,d.title,d.created_at,p.name AS project + FROM documents d JOIN projects p ON p.id=d.project_id + ORDER BY d.id DESC LIMIT 200 + """ + ).fetchall() + + p_opts = "" + "".join( + [f"" for p in projects] + ) + items = "".join( + [ + f"" + for d in docs + ] + ) + body = f""" +Keine Einträge.
'} +""" + return layout("Library", body) -@app.post("/analyze") -def analyze(transcript_id: int = Form(...), prompt: str = Form(...)): +@app.get("/document/{doc_id}", response_class=HTMLResponse) +def view_document(doc_id: int): with db() as c: - row = c.execute("SELECT formatted_text FROM transcripts WHERE id=?", (transcript_id,)).fetchone() - if not row: - raise HTTPException(404, "transcript not found") + d = c.execute( + """ + SELECT d.*, p.name AS project, pr.name AS prompt_name + FROM documents d + JOIN projects p ON p.id=d.project_id + LEFT JOIN prompts pr ON pr.id=d.prompt_id + WHERE d.id=? + """, + (doc_id,), + ).fetchone() + if not d: + raise HTTPException(404, "not found") + + body = f""" +Projekt: {d['project']} · Typ: {d['kind']} · {d['created_at']}
+ +{(d['content_md'] or '').replace('<','<')}
+"""
+ return layout("Dokument", body)
+
+
+@app.get("/document/{doc_id}.md", response_class=PlainTextResponse)
+def download_md(doc_id: int):
+ with db() as c:
+ d = c.execute("SELECT title,content_md FROM documents WHERE id=?", (doc_id,)).fetchone()
+ if not d:
+ raise HTTPException(404, "not found")
+ return PlainTextResponse(d["content_md"], headers={"Content-Disposition": f"attachment; filename=document_{doc_id}.md"})
+
+
+@app.get("/prompts", response_class=HTMLResponse)
+def prompts_page():
+ with db() as c:
+ prompts = c.execute("SELECT * FROM prompts ORDER BY name").fetchall()
+ projects = c.execute("SELECT id,name FROM projects ORDER BY name").fetchall()
+
+ p_list = "".join(
+ [
+ f"{(p['prompt'] or '').replace('<','<')}"
+ f"