Files
diarization-ui/app.py

212 lines
6.9 KiB
Python
Raw Normal View History

import json
import os
import sqlite3
from datetime import datetime
from typing import Optional
import requests
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
from fastapi.responses import HTMLResponse
API_BASE = os.getenv("API_BASE", "http://gx10.aquantico.lan:8093").rstrip("/")
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://gx10.aquantico.lan:11434").rstrip("/")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:9b")
DB_PATH = os.getenv("DB_PATH", "/data/ui.db")
app = FastAPI(title="Diarization UI + LLM")
def db():
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
return conn
def init_db():
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
with db() as c:
c.execute(
"""
CREATE TABLE IF NOT EXISTS transcripts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
created_at TEXT NOT NULL,
filename TEXT,
formatted_text TEXT NOT NULL,
raw_json TEXT NOT NULL
)
"""
)
c.execute(
"""
CREATE TABLE IF NOT EXISTS analyses (
id INTEGER PRIMARY KEY AUTOINCREMENT,
transcript_id INTEGER NOT NULL,
created_at TEXT NOT NULL,
prompt TEXT NOT NULL,
answer TEXT NOT NULL,
FOREIGN KEY(transcript_id) REFERENCES transcripts(id)
)
"""
)
@app.on_event("startup")
def startup():
init_db()
@app.get("/healthz")
def healthz():
return {
"ok": True,
"api_base": API_BASE,
"ollama_base_url": OLLAMA_BASE_URL,
"ollama_model": OLLAMA_MODEL,
"db_path": DB_PATH,
}
@app.get("/", response_class=HTMLResponse)
def index():
return """
<!doctype html>
<html><head><meta charset='utf-8'><meta name='viewport' content='width=device-width, initial-scale=1'>
<title>Diarization UI</title>
<style>body{font-family:Arial;max-width:1100px;margin:24px auto;padding:0 12px}.row{display:flex;gap:8px;flex-wrap:wrap}button{padding:8px 12px}pre{white-space:pre-wrap;background:#111;color:#0f0;padding:10px;border-radius:8px;min-height:140px}.card{border:1px solid #ddd;border-radius:8px;padding:10px;margin:10px 0}</style>
</head>
<body>
<h2>Upload -> Transcribe + Diarize -> speichern -> LLM Analyse</h2>
<div class='row'>
<input id='f' type='file' accept='audio/*'>
<button onclick='processFile()'>Verarbeiten</button>
</div>
<p id='status'></p>
<pre id='out'></pre>
<h3>Analyse</h3>
<div class='row'>
<input id='tid' type='number' placeholder='transcript_id'>
<input id='prompt' style='width:500px' placeholder='z.B. Fasse zusammen und extrahiere Aufgaben mit Verantwortlichen.'>
<button onclick='analyze()'>Mit Qwen analysieren</button>
</div>
<pre id='analysis'></pre>
<h3>Gespeicherte Transkripte</h3>
<button onclick='loadTranscripts()'>Neu laden</button>
<div id='list'></div>
<script>
async function processFile(){
const fi=document.getElementById('f');
if(!fi.files.length){alert('Datei wählen');return;}
const fd=new FormData(); fd.append('file',fi.files[0]);
document.getElementById('status').textContent='Läuft...';
const r=await fetch('/process',{method:'POST',body:fd});
const j=await r.json();
document.getElementById('status').textContent = r.ok ? `OK transcript_id=${j.transcript_id}` : `Fehler ${r.status}`;
document.getElementById('out').textContent = JSON.stringify(j,null,2);
if(j.transcript_id){document.getElementById('tid').value=j.transcript_id;}
loadTranscripts();
}
async function analyze(){
const transcript_id=parseInt(document.getElementById('tid').value||'0');
const prompt=document.getElementById('prompt').value;
const r=await fetch('/analyze',{method:'POST',headers:{'Content-Type':'application/x-www-form-urlencoded'},body:new URLSearchParams({transcript_id,prompt})});
const j=await r.json();
document.getElementById('analysis').textContent = JSON.stringify(j,null,2);
}
async function loadTranscripts(){
const r=await fetch('/transcripts');
const j=await r.json();
const root=document.getElementById('list');
root.innerHTML='';
for(const t of j.items){
const d=document.createElement('div'); d.className='card';
d.innerHTML=`<b>#${t.id}</b> ${t.created_at} ${t.filename||''}<br><pre>${(t.formatted_text||'').slice(0,1200)}</pre>`;
root.appendChild(d);
}
}
loadTranscripts();
</script>
</body></html>
"""
@app.post("/process")
async def process(file: UploadFile = File(...)):
data = await file.read()
if not data:
raise HTTPException(400, "empty file")
files = {"file": (file.filename or "audio.bin", data, file.content_type or "application/octet-stream")}
try:
r = requests.post(f"{API_BASE}/transcribe-diarize", files=files, timeout=1800)
except Exception as e:
raise HTTPException(502, f"API unreachable: {e}")
if r.status_code >= 400:
raise HTTPException(r.status_code, r.text)
payload = r.json()
formatted = payload.get("formatted_text", "")
with db() as c:
cur = c.execute(
"INSERT INTO transcripts(created_at, filename, formatted_text, raw_json) VALUES (?,?,?,?)",
(datetime.utcnow().isoformat(), file.filename, formatted, json.dumps(payload, ensure_ascii=False)),
)
transcript_id = cur.lastrowid
return {"ok": True, "transcript_id": transcript_id, **payload}
@app.get("/transcripts")
def transcripts(limit: int = 20):
with db() as c:
rows = c.execute(
"SELECT id, created_at, filename, formatted_text FROM transcripts ORDER BY id DESC LIMIT ?",
(limit,),
).fetchall()
return {"items": [dict(r) for r in rows]}
@app.post("/analyze")
def analyze(transcript_id: int = Form(...), prompt: str = Form(...)):
with db() as c:
row = c.execute("SELECT formatted_text FROM transcripts WHERE id=?", (transcript_id,)).fetchone()
if not row:
raise HTTPException(404, "transcript not found")
transcript_text = row[0]
llm_prompt = (
"Du bist ein Meeting-Analyst. Arbeite auf Deutsch.\n"
"Erzeuge präzise Ausgabe für den folgenden Auftrag.\n\n"
f"AUFTRAG:\n{prompt}\n\n"
f"TRANSKRIPT:\n{transcript_text}\n"
)
body = {
"model": OLLAMA_MODEL,
"prompt": llm_prompt,
"stream": False,
}
try:
r = requests.post(f"{OLLAMA_BASE_URL}/api/generate", json=body, timeout=600)
except Exception as e:
raise HTTPException(502, f"Ollama unreachable: {e}")
if r.status_code >= 400:
raise HTTPException(r.status_code, r.text)
j = r.json()
answer = j.get("response", "")
with db() as c:
cur = c.execute(
"INSERT INTO analyses(transcript_id, created_at, prompt, answer) VALUES (?,?,?,?)",
(transcript_id, datetime.utcnow().isoformat(), prompt, answer),
)
analysis_id = cur.lastrowid
return {"ok": True, "analysis_id": analysis_id, "answer": answer}