From 25793c022c7536280d129e7e6e9bc60a0ce3e3ea Mon Sep 17 00:00:00 2001 From: OpenClaw Bot Date: Sat, 21 Mar 2026 13:47:36 +0100 Subject: [PATCH] feat(diarization-ui): add FastAPI UI backend with sqlite storage and Ollama analysis pipeline --- .env.example | 2 + Dockerfile | 16 ++-- README.md | 45 ++++++--- app.py | 211 +++++++++++++++++++++++++++++++++++++++++++ docker-compose.yml | 12 ++- docker-entrypoint.sh | 7 -- index.html.template | 50 ---------- requirements.txt | 4 + 8 files changed, 269 insertions(+), 78 deletions(-) create mode 100644 app.py delete mode 100644 docker-entrypoint.sh delete mode 100644 index.html.template create mode 100644 requirements.txt diff --git a/.env.example b/.env.example index 7d84798..cf68448 100644 --- a/.env.example +++ b/.env.example @@ -1 +1,3 @@ API_BASE=http://gx10.aquantico.lan:8093 +OLLAMA_BASE_URL=http://gx10.aquantico.lan:11434 +OLLAMA_MODEL=qwen3.5:9b diff --git a/Dockerfile b/Dockerfile index ba46cf3..4af6e5b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,12 @@ -FROM docker.io/library/nginx:alpine +FROM docker.io/library/python:3.12-slim -COPY index.html.template /usr/share/nginx/html/index.html.template -COPY docker-entrypoint.sh /docker-entrypoint.sh -RUN chmod +x /docker-entrypoint.sh +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 -EXPOSE 80 -CMD ["/docker-entrypoint.sh"] +WORKDIR /app +COPY requirements.txt /app/requirements.txt +RUN pip install --no-cache-dir -r /app/requirements.txt +COPY app.py /app/app.py + +EXPOSE 8094 +CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8094"] diff --git a/README.md b/README.md index 837f4df..28db353 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,39 @@ # diarization-ui -Separate UI container for the diarization/transcription API. - -## Run - -```bash -docker compose up -d --build -``` - -UI will be available on `http://127.0.0.1:8094/`. - -By default it calls API at `http://diarization-api:8093`. -Set `API_BASE` in `.env` if needed. +Eigenes UI-Projekt (separates Repo/Container) für: +- Upload Audio +- Aufruf von `transcribe-diarize` API +- Speichern in SQLite +- LLM-Auswertung via Ollama (Qwen) ## .env ```env -API_BASE=http://diarization-api:8093 +API_BASE=http://gx10.aquantico.lan:8093 +OLLAMA_BASE_URL=http://gx10.aquantico.lan:11434 +OLLAMA_MODEL=qwen3.5:9b ``` + +## Start (Docker/Compose) + +```bash +cp .env.example .env +docker compose up -d --build +``` + +UI: `http://127.0.0.1:8094/` + +## Podman (einzelner Container) + +```bash +podman build -t localhost/diarization-ui:latest . +podman rm -f diarization-ui || true +podman run -d --name diarization-ui -p 18094:8094 \ + -e API_BASE=http://gx10.aquantico.lan:8093 \ + -e OLLAMA_BASE_URL=http://gx10.aquantico.lan:11434 \ + -e OLLAMA_MODEL=qwen3.5:9b \ + -v diarization_ui_data:/data \ + localhost/diarization-ui:latest +``` + +UI dann: `http://127.0.0.1:18094/` diff --git a/app.py b/app.py new file mode 100644 index 0000000..c391eda --- /dev/null +++ b/app.py @@ -0,0 +1,211 @@ +import json +import os +import sqlite3 +from datetime import datetime +from typing import Optional + +import requests +from fastapi import FastAPI, File, Form, HTTPException, UploadFile +from fastapi.responses import HTMLResponse + +API_BASE = os.getenv("API_BASE", "http://gx10.aquantico.lan:8093").rstrip("/") +OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://gx10.aquantico.lan:11434").rstrip("/") +OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:9b") +DB_PATH = os.getenv("DB_PATH", "/data/ui.db") + +app = FastAPI(title="Diarization UI + LLM") + + +def db(): + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + return conn + + +def init_db(): + os.makedirs(os.path.dirname(DB_PATH), exist_ok=True) + with db() as c: + c.execute( + """ + CREATE TABLE IF NOT EXISTS transcripts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + created_at TEXT NOT NULL, + filename TEXT, + formatted_text TEXT NOT NULL, + raw_json TEXT NOT NULL + ) + """ + ) + c.execute( + """ + CREATE TABLE IF NOT EXISTS analyses ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + transcript_id INTEGER NOT NULL, + created_at TEXT NOT NULL, + prompt TEXT NOT NULL, + answer TEXT NOT NULL, + FOREIGN KEY(transcript_id) REFERENCES transcripts(id) + ) + """ + ) + + +@app.on_event("startup") +def startup(): + init_db() + + +@app.get("/healthz") +def healthz(): + return { + "ok": True, + "api_base": API_BASE, + "ollama_base_url": OLLAMA_BASE_URL, + "ollama_model": OLLAMA_MODEL, + "db_path": DB_PATH, + } + + +@app.get("/", response_class=HTMLResponse) +def index(): + return """ + + +Diarization UI + + + +

Upload -> Transcribe + Diarize -> speichern -> LLM Analyse

+
+ + +
+

+

+
+

Analyse

+
+ + + +
+

+
+

Gespeicherte Transkripte

+ +
+ + + +""" + + +@app.post("/process") +async def process(file: UploadFile = File(...)): + data = await file.read() + if not data: + raise HTTPException(400, "empty file") + + files = {"file": (file.filename or "audio.bin", data, file.content_type or "application/octet-stream")} + try: + r = requests.post(f"{API_BASE}/transcribe-diarize", files=files, timeout=1800) + except Exception as e: + raise HTTPException(502, f"API unreachable: {e}") + + if r.status_code >= 400: + raise HTTPException(r.status_code, r.text) + + payload = r.json() + formatted = payload.get("formatted_text", "") + + with db() as c: + cur = c.execute( + "INSERT INTO transcripts(created_at, filename, formatted_text, raw_json) VALUES (?,?,?,?)", + (datetime.utcnow().isoformat(), file.filename, formatted, json.dumps(payload, ensure_ascii=False)), + ) + transcript_id = cur.lastrowid + + return {"ok": True, "transcript_id": transcript_id, **payload} + + +@app.get("/transcripts") +def transcripts(limit: int = 20): + with db() as c: + rows = c.execute( + "SELECT id, created_at, filename, formatted_text FROM transcripts ORDER BY id DESC LIMIT ?", + (limit,), + ).fetchall() + return {"items": [dict(r) for r in rows]} + + +@app.post("/analyze") +def analyze(transcript_id: int = Form(...), prompt: str = Form(...)): + with db() as c: + row = c.execute("SELECT formatted_text FROM transcripts WHERE id=?", (transcript_id,)).fetchone() + if not row: + raise HTTPException(404, "transcript not found") + + transcript_text = row[0] + llm_prompt = ( + "Du bist ein Meeting-Analyst. Arbeite auf Deutsch.\n" + "Erzeuge präzise Ausgabe für den folgenden Auftrag.\n\n" + f"AUFTRAG:\n{prompt}\n\n" + f"TRANSKRIPT:\n{transcript_text}\n" + ) + + body = { + "model": OLLAMA_MODEL, + "prompt": llm_prompt, + "stream": False, + } + try: + r = requests.post(f"{OLLAMA_BASE_URL}/api/generate", json=body, timeout=600) + except Exception as e: + raise HTTPException(502, f"Ollama unreachable: {e}") + + if r.status_code >= 400: + raise HTTPException(r.status_code, r.text) + + j = r.json() + answer = j.get("response", "") + + with db() as c: + cur = c.execute( + "INSERT INTO analyses(transcript_id, created_at, prompt, answer) VALUES (?,?,?,?)", + (transcript_id, datetime.utcnow().isoformat(), prompt, answer), + ) + analysis_id = cur.lastrowid + + return {"ok": True, "analysis_id": analysis_id, "answer": answer} diff --git a/docker-compose.yml b/docker-compose.yml index f74c26e..ec550bf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,11 +1,19 @@ services: diarization-ui: build: - context: ./web-ui + context: . dockerfile: Dockerfile container_name: diarization-ui restart: unless-stopped ports: - - "8094:80" + - "8094:8094" environment: - API_BASE=${API_BASE:-http://gx10.aquantico.lan:8093} + - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-http://gx10.aquantico.lan:11434} + - OLLAMA_MODEL=${OLLAMA_MODEL:-qwen3.5:9b} + - DB_PATH=/data/ui.db + volumes: + - diarization_ui_data:/data + +volumes: + diarization_ui_data: diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh deleted file mode 100644 index b08bfa8..0000000 --- a/docker-entrypoint.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env sh -set -eu - -API_BASE="${API_BASE:-http://diarization-api:8093}" -sed "s|__API_BASE__|${API_BASE}|g" /usr/share/nginx/html/index.html.template > /usr/share/nginx/html/index.html - -exec nginx -g 'daemon off;' diff --git a/index.html.template b/index.html.template deleted file mode 100644 index 9dba953..0000000 --- a/index.html.template +++ /dev/null @@ -1,50 +0,0 @@ - - - - - - Diarization + Whisper UI - - - -

Whisper + Sprechertrennung

-

API:

-
- - -
-

-
-  
-
-
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..2d5ec1e
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+fastapi==0.115.6
+uvicorn[standard]==0.32.1
+requests==2.32.3
+python-multipart==0.0.12