Refactor storage to provider-agnostic band-scoped model

Replaces per-member Nextcloud credentials with a BandStorage model that
supports multiple providers. Credentials are Fernet-encrypted at rest;
worker receives audio via an internal streaming endpoint instead of
direct storage access.

- Add BandStorage DB model with partial unique index (one active per band)
- Add migrations 0007 (create band_storage) and 0008 (drop old nc columns)
- Add StorageFactory that builds the correct StorageClient from BandStorage
- Add storage router: connect/nextcloud, OAuth2 authorize/callback, list, disconnect
- Add Fernet encryption helpers in security/encryption.py
- Rewrite watcher for per-band polling via internal API config endpoint
- Update worker to stream audio from API instead of accessing storage directly
- Update frontend: new storage API in bands.ts, rewritten StorageSection,
  simplified band creation modal (no storage step)
- Add STORAGE_ENCRYPTION_KEY to all docker-compose files

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mistral Vibe
2026-04-10 23:22:36 +02:00
parent ba22853bc7
commit b2d6b4d113
44 changed files with 1725 additions and 675 deletions

View File

@@ -5,11 +5,10 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
class WatcherSettings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
nextcloud_url: str = "http://nextcloud"
nextcloud_user: str = "ncadmin"
nextcloud_pass: str = ""
api_url: str = "http://api:8000"
# Shared secret for calling internal API endpoints
internal_secret: str = "dev-change-me-in-production"
redis_url: str = "redis://localhost:6379/0"
job_queue_key: str = "rh:jobs"
@@ -18,6 +17,10 @@ class WatcherSettings(BaseSettings):
# File extensions to watch
audio_extensions: list[str] = [".wav", ".mp3", ".flac", ".aac", ".ogg", ".m4a", ".opus"]
# How often (in poll cycles) to refresh the list of bands from the API.
# 0 = only on startup, N = every N poll cycles.
config_refresh_interval: int = 10
@lru_cache
def get_settings() -> WatcherSettings:

View File

@@ -1,149 +1,93 @@
"""Event loop: poll Nextcloud activity, detect audio uploads, push to API."""
"""Event loop: fetch per-band storage configs from the API, detect audio uploads."""
from __future__ import annotations
import logging
from pathlib import Path
from typing import Any
import httpx
from watcher.config import WatcherSettings
from watcher.nc_client import NextcloudWatcherClient
from watcher.nc_watcher import NextcloudWatcher
from watcher.protocol import FileEvent, WatcherClient
log = logging.getLogger("watcher.event_loop")
# Persist last seen activity ID in-process (good enough for a POC)
_last_activity_id: int = 0
# Nextcloud Activity API v2 filter sets.
#
# NC 22+ returns: type="file_created"|"file_changed" (subject is human-readable)
# NC <22 returns: type="files" (subject is a machine key like "created_self")
#
# We accept either style so the watcher works across NC versions.
_UPLOAD_TYPES = {"file_created", "file_changed"}
_UPLOAD_SUBJECTS = {
"created_by",
"changed_by",
"created_public",
"created_self",
"changed_self",
}
def is_audio_file(path: str, extensions: list[str]) -> bool:
return Path(path).suffix.lower() in extensions
def normalize_nc_path(raw_path: str, username: str) -> str:
"""
Strip the Nextcloud WebDAV/activity path prefix so we get a plain
user-relative path.
Activity objects can look like:
/username/files/bands/slug/...
/remote.php/dav/files/username/bands/slug/...
bands/slug/... (already relative)
"""
path = raw_path.strip("/")
# /remote.php/dav/files/<user>/...
dav_prefix = f"remote.php/dav/files/{username}/"
if path.startswith(dav_prefix):
return path[len(dav_prefix):]
# /<username>/files/... (activity app format)
user_files_prefix = f"{username}/files/"
if path.startswith(user_files_prefix):
return path[len(user_files_prefix):]
# files/...
if path.startswith("files/"):
return path[len("files/"):]
return path
def extract_nc_file_path(activity: dict[str, Any]) -> str | None:
"""Extract the server-relative file path from an activity event."""
objects = activity.get("objects", {})
if isinstance(objects, dict):
for _file_id, file_path in objects.items():
if isinstance(file_path, str):
return file_path
# Fallback: older NC versions put it in object_name
return activity.get("object_name") or None
async def register_version_with_api(nc_file_path: str, nc_file_etag: str | None, api_url: str) -> bool:
async def fetch_nextcloud_configs(settings: WatcherSettings) -> list[dict]:
"""Fetch active Nextcloud configs for all bands from the internal API."""
url = f"{settings.api_url}/api/v1/internal/storage/nextcloud-watch-configs"
headers = {"X-Internal-Token": settings.internal_secret}
try:
payload = {"nc_file_path": nc_file_path, "nc_file_etag": nc_file_etag}
async with httpx.AsyncClient(timeout=15.0) as c:
resp = await c.post(f"{api_url}/api/v1/internal/nc-upload", json=payload)
resp = await c.get(url, headers=headers)
resp.raise_for_status()
return resp.json()
except Exception as exc:
log.warning("Failed to fetch NC configs from API: %s", exc)
return []
def build_nc_watchers(
configs: list[dict],
settings: WatcherSettings,
) -> dict[str, NextcloudWatcher]:
"""Build one NextcloudWatcher per band from the API config payload."""
watchers: dict[str, NextcloudWatcher] = {}
for cfg in configs:
band_id = cfg["band_id"]
try:
watchers[band_id] = NextcloudWatcher(
band_id=band_id,
nc_url=cfg["nc_url"],
nc_username=cfg["nc_username"],
nc_app_password=cfg["nc_app_password"],
audio_extensions=settings.audio_extensions,
)
except Exception as exc:
log.error("Failed to create watcher for band %s: %s", band_id, exc)
return watchers
async def register_event_with_api(event: FileEvent, settings: WatcherSettings) -> bool:
"""Forward a FileEvent to the API's internal nc-upload endpoint."""
payload = {"nc_file_path": event.file_path, "nc_file_etag": event.etag}
headers = {"X-Internal-Token": settings.internal_secret}
try:
async with httpx.AsyncClient(timeout=15.0) as c:
resp = await c.post(
f"{settings.api_url}/api/v1/internal/nc-upload",
json=payload,
headers=headers,
)
if resp.status_code in (200, 201):
log.info("Registered version via internal API: %s", nc_file_path)
log.info("Registered event via internal API: %s", event.file_path)
return True
log.warning(
"Internal API returned %d for %s: %s",
resp.status_code, nc_file_path, resp.text[:200],
resp.status_code, event.file_path, resp.text[:200],
)
return False
except Exception as exc:
log.warning("Failed to register version with API for %s: %s", nc_file_path, exc)
log.warning("Failed to register event with API for %s: %s", event.file_path, exc)
return False
async def poll_once(nc_client: NextcloudWatcherClient, settings: WatcherSettings) -> None:
global _last_activity_id
activities = await nc_client.get_activities(since_id=_last_activity_id)
if not activities:
log.info("No new activities since id=%d", _last_activity_id)
return
log.info("Received %d activities (since id=%d)", len(activities), _last_activity_id)
for activity in activities:
activity_id = int(activity.get("activity_id", 0))
activity_type = activity.get("type", "")
subject = activity.get("subject", "")
raw_path = extract_nc_file_path(activity)
# Advance the cursor regardless of whether we act on this event
_last_activity_id = max(_last_activity_id, activity_id)
log.info(
"Activity id=%d type=%r subject=%r raw_path=%r",
activity_id, activity_type, subject, raw_path,
)
if raw_path is None:
log.info(" → skip: no file path in activity payload")
continue
nc_path = normalize_nc_path(raw_path, nc_client.username)
log.info(" → normalized path: %r", nc_path)
# Only care about audio files — skip everything else immediately
if not is_audio_file(nc_path, settings.audio_extensions):
log.info(
" → skip: not an audio file (ext=%s)",
Path(nc_path).suffix.lower() or "<none>",
)
continue
if activity_type not in _UPLOAD_TYPES and subject not in _UPLOAD_SUBJECTS:
log.info(
" → skip: type=%r subject=%r is not a file upload event",
activity_type, subject,
)
continue
log.info(" → MATCH — registering audio upload: %s", nc_path)
etag = await nc_client.get_file_etag(nc_path)
success = await register_version_with_api(nc_path, etag, settings.api_url)
if not success:
log.warning(" → FAILED to register upload for activity %d (%s)", activity_id, nc_path)
async def poll_all_once(
watchers: dict[str, WatcherClient],
cursors: dict[str, str | None],
settings: WatcherSettings,
) -> None:
"""Poll every watcher once and forward new events to the API."""
for band_id, watcher in watchers.items():
cursor = cursors.get(band_id)
try:
events, new_cursor = await watcher.poll_changes(cursor)
cursors[band_id] = new_cursor
if not events:
log.debug("Band %s: no new events (cursor=%s)", band_id, new_cursor)
continue
log.info("Band %s: %d new event(s)", band_id, len(events))
for event in events:
await register_event_with_api(event, settings)
except Exception as exc:
log.exception("Poll error for band %s: %s", band_id, exc)

View File

@@ -6,11 +6,13 @@ import asyncio
import logging
from watcher.config import get_settings
from watcher.event_loop import poll_once
from watcher.nc_client import NextcloudWatcherClient
from watcher.event_loop import (
build_nc_watchers,
fetch_nextcloud_configs,
poll_all_once,
)
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s %(levelname)s %(name)s %(message)s")
# Quiet httpx's per-request noise at DEBUG; keep our own loggers verbose
logging.getLogger("httpx").setLevel(logging.INFO)
logging.getLogger("httpcore").setLevel(logging.WARNING)
log = logging.getLogger("watcher")
@@ -18,22 +20,39 @@ log = logging.getLogger("watcher")
async def main() -> None:
settings = get_settings()
nc = NextcloudWatcherClient(
base_url=settings.nextcloud_url,
username=settings.nextcloud_user,
password=settings.nextcloud_pass,
)
log.info("Starting watcher (poll_interval=%ds)", settings.poll_interval)
log.info("Waiting for Nextcloud to become available...")
while not await nc.is_healthy():
await asyncio.sleep(10)
log.info("Nextcloud is ready. Starting poll loop (interval=%ds)", settings.poll_interval)
# Per-band WatcherClient instances; keyed by band_id string
watchers: dict = {}
# Per-band opaque cursors (last seen activity ID, page token, etc.)
cursors: dict[str, str | None] = {}
poll_cycle = 0
while True:
try:
await poll_once(nc, settings)
except Exception as exc:
log.exception("Poll error: %s", exc)
# Refresh the list of bands (and their storage configs) periodically.
refresh = (
poll_cycle == 0
or (settings.config_refresh_interval > 0 and poll_cycle % settings.config_refresh_interval == 0)
)
if refresh:
log.info("Refreshing storage configs from API…")
configs = await fetch_nextcloud_configs(settings)
if configs:
watchers = build_nc_watchers(configs, settings)
# Preserve cursors for bands that were already being watched
for band_id in watchers:
cursors.setdefault(band_id, None)
log.info("Watching %d Nextcloud band(s): %s", len(watchers), list(watchers))
else:
log.warning("No Nextcloud storage configs received — no bands to watch")
if watchers:
try:
await poll_all_once(watchers, cursors, settings)
except Exception as exc:
log.exception("Unexpected error in poll loop: %s", exc)
poll_cycle += 1
await asyncio.sleep(settings.poll_interval)

View File

@@ -0,0 +1,116 @@
"""Nextcloud WatcherClient implementation.
Polls the Nextcloud Activity API to detect new / modified audio files.
The cursor is the last seen ``activity_id`` (stored as a string for
protocol compatibility).
"""
from __future__ import annotations
import logging
from pathlib import Path
from watcher.nc_client import NextcloudWatcherClient
from watcher.protocol import FileEvent
log = logging.getLogger("watcher.nc_watcher")
_UPLOAD_TYPES = {"file_created", "file_changed"}
_UPLOAD_SUBJECTS = {
"created_by",
"changed_by",
"created_public",
"created_self",
"changed_self",
}
class NextcloudWatcher:
"""WatcherClient implementation backed by the Nextcloud Activity API."""
def __init__(
self,
band_id: str,
nc_url: str,
nc_username: str,
nc_app_password: str,
audio_extensions: list[str],
) -> None:
self.band_id = band_id
self._audio_extensions = audio_extensions
self._nc = NextcloudWatcherClient(
base_url=nc_url,
username=nc_username,
password=nc_app_password,
)
async def poll_changes(self, cursor: str | None) -> tuple[list[FileEvent], str]:
since_id = int(cursor) if cursor else 0
activities = await self._nc.get_activities(since_id=since_id)
events: list[FileEvent] = []
new_cursor = cursor or "0"
for activity in activities:
activity_id = int(activity.get("activity_id", 0))
new_cursor = str(max(int(new_cursor), activity_id))
activity_type = activity.get("type", "")
subject = activity.get("subject", "")
raw_path = _extract_file_path(activity)
if raw_path is None:
continue
nc_path = _normalize_path(raw_path, self._nc.username)
log.debug("Activity %d type=%r path=%r", activity_id, activity_type, nc_path)
if not _is_audio(nc_path, self._audio_extensions):
continue
if activity_type not in _UPLOAD_TYPES and subject not in _UPLOAD_SUBJECTS:
continue
etag = await self._nc.get_file_etag(nc_path)
events.append(
FileEvent(
band_id=self.band_id,
file_path=nc_path,
event_type="created" if "created" in activity_type else "modified",
etag=etag,
)
)
return events, new_cursor
async def is_healthy(self) -> bool:
return await self._nc.is_healthy()
# ── Helpers ────────────────────────────────────────────────────────────────────
def _extract_file_path(activity: dict) -> str | None:
objects = activity.get("objects", {})
if isinstance(objects, dict):
for _, file_path in objects.items():
if isinstance(file_path, str):
return file_path
return activity.get("object_name") or None
def _normalize_path(raw_path: str, username: str) -> str:
path = raw_path.strip("/")
dav_prefix = f"remote.php/dav/files/{username}/"
if path.startswith(dav_prefix):
return path[len(dav_prefix):]
user_files_prefix = f"{username}/files/"
if path.startswith(user_files_prefix):
return path[len(user_files_prefix):]
if path.startswith("files/"):
return path[len("files/"):]
return path
def _is_audio(path: str, extensions: list[str]) -> bool:
return Path(path).suffix.lower() in extensions

View File

@@ -0,0 +1,42 @@
"""WatcherClient protocol — abstracts provider-specific change-detection APIs.
Each storage provider implements its own change detection:
Nextcloud → Activity API (polling)
Google Drive → Changes API or webhook push
OneDrive → Microsoft Graph subscriptions
Dropbox → Long-poll or webhooks
All implementations must satisfy this protocol so the event loop can treat
them uniformly.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Protocol
@dataclass
class FileEvent:
"""A file-change event emitted by a WatcherClient."""
band_id: str
file_path: str # Provider-relative path (no host, no DAV prefix)
event_type: str # 'created' | 'modified' | 'deleted'
etag: str | None = None
class WatcherClient(Protocol):
band_id: str
async def poll_changes(self, cursor: str | None) -> tuple[list[FileEvent], str]:
"""Return (events, new_cursor) since the given cursor.
``cursor`` is an opaque string whose meaning is implementation-defined
(e.g., an activity ID for Nextcloud, a page token for Google Drive).
Pass ``None`` to start from the current position (i.e. only new events).
"""
...
async def is_healthy(self) -> bool:
"""Return True if the storage backend is reachable."""
...