Refactor storage to provider-agnostic band-scoped model
Replaces per-member Nextcloud credentials with a BandStorage model that supports multiple providers. Credentials are Fernet-encrypted at rest; worker receives audio via an internal streaming endpoint instead of direct storage access. - Add BandStorage DB model with partial unique index (one active per band) - Add migrations 0007 (create band_storage) and 0008 (drop old nc columns) - Add StorageFactory that builds the correct StorageClient from BandStorage - Add storage router: connect/nextcloud, OAuth2 authorize/callback, list, disconnect - Add Fernet encryption helpers in security/encryption.py - Rewrite watcher for per-band polling via internal API config endpoint - Update worker to stream audio from API instead of accessing storage directly - Update frontend: new storage API in bands.ts, rewritten StorageSection, simplified band creation modal (no storage step) - Add STORAGE_ENCRYPTION_KEY to all docker-compose files Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5,11 +5,10 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
class WatcherSettings(BaseSettings):
|
||||
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
|
||||
|
||||
nextcloud_url: str = "http://nextcloud"
|
||||
nextcloud_user: str = "ncadmin"
|
||||
nextcloud_pass: str = ""
|
||||
|
||||
api_url: str = "http://api:8000"
|
||||
# Shared secret for calling internal API endpoints
|
||||
internal_secret: str = "dev-change-me-in-production"
|
||||
|
||||
redis_url: str = "redis://localhost:6379/0"
|
||||
job_queue_key: str = "rh:jobs"
|
||||
|
||||
@@ -18,6 +17,10 @@ class WatcherSettings(BaseSettings):
|
||||
# File extensions to watch
|
||||
audio_extensions: list[str] = [".wav", ".mp3", ".flac", ".aac", ".ogg", ".m4a", ".opus"]
|
||||
|
||||
# How often (in poll cycles) to refresh the list of bands from the API.
|
||||
# 0 = only on startup, N = every N poll cycles.
|
||||
config_refresh_interval: int = 10
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_settings() -> WatcherSettings:
|
||||
|
||||
@@ -1,149 +1,93 @@
|
||||
"""Event loop: poll Nextcloud activity, detect audio uploads, push to API."""
|
||||
"""Event loop: fetch per-band storage configs from the API, detect audio uploads."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from watcher.config import WatcherSettings
|
||||
from watcher.nc_client import NextcloudWatcherClient
|
||||
from watcher.nc_watcher import NextcloudWatcher
|
||||
from watcher.protocol import FileEvent, WatcherClient
|
||||
|
||||
log = logging.getLogger("watcher.event_loop")
|
||||
|
||||
# Persist last seen activity ID in-process (good enough for a POC)
|
||||
_last_activity_id: int = 0
|
||||
|
||||
# Nextcloud Activity API v2 filter sets.
|
||||
#
|
||||
# NC 22+ returns: type="file_created"|"file_changed" (subject is human-readable)
|
||||
# NC <22 returns: type="files" (subject is a machine key like "created_self")
|
||||
#
|
||||
# We accept either style so the watcher works across NC versions.
|
||||
_UPLOAD_TYPES = {"file_created", "file_changed"}
|
||||
|
||||
_UPLOAD_SUBJECTS = {
|
||||
"created_by",
|
||||
"changed_by",
|
||||
"created_public",
|
||||
"created_self",
|
||||
"changed_self",
|
||||
}
|
||||
|
||||
|
||||
def is_audio_file(path: str, extensions: list[str]) -> bool:
|
||||
return Path(path).suffix.lower() in extensions
|
||||
|
||||
|
||||
def normalize_nc_path(raw_path: str, username: str) -> str:
|
||||
"""
|
||||
Strip the Nextcloud WebDAV/activity path prefix so we get a plain
|
||||
user-relative path.
|
||||
|
||||
Activity objects can look like:
|
||||
/username/files/bands/slug/...
|
||||
/remote.php/dav/files/username/bands/slug/...
|
||||
bands/slug/... (already relative)
|
||||
"""
|
||||
path = raw_path.strip("/")
|
||||
|
||||
# /remote.php/dav/files/<user>/...
|
||||
dav_prefix = f"remote.php/dav/files/{username}/"
|
||||
if path.startswith(dav_prefix):
|
||||
return path[len(dav_prefix):]
|
||||
|
||||
# /<username>/files/... (activity app format)
|
||||
user_files_prefix = f"{username}/files/"
|
||||
if path.startswith(user_files_prefix):
|
||||
return path[len(user_files_prefix):]
|
||||
|
||||
# files/...
|
||||
if path.startswith("files/"):
|
||||
return path[len("files/"):]
|
||||
|
||||
return path
|
||||
|
||||
|
||||
|
||||
def extract_nc_file_path(activity: dict[str, Any]) -> str | None:
|
||||
"""Extract the server-relative file path from an activity event."""
|
||||
objects = activity.get("objects", {})
|
||||
if isinstance(objects, dict):
|
||||
for _file_id, file_path in objects.items():
|
||||
if isinstance(file_path, str):
|
||||
return file_path
|
||||
# Fallback: older NC versions put it in object_name
|
||||
return activity.get("object_name") or None
|
||||
|
||||
|
||||
async def register_version_with_api(nc_file_path: str, nc_file_etag: str | None, api_url: str) -> bool:
|
||||
async def fetch_nextcloud_configs(settings: WatcherSettings) -> list[dict]:
|
||||
"""Fetch active Nextcloud configs for all bands from the internal API."""
|
||||
url = f"{settings.api_url}/api/v1/internal/storage/nextcloud-watch-configs"
|
||||
headers = {"X-Internal-Token": settings.internal_secret}
|
||||
try:
|
||||
payload = {"nc_file_path": nc_file_path, "nc_file_etag": nc_file_etag}
|
||||
async with httpx.AsyncClient(timeout=15.0) as c:
|
||||
resp = await c.post(f"{api_url}/api/v1/internal/nc-upload", json=payload)
|
||||
resp = await c.get(url, headers=headers)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
except Exception as exc:
|
||||
log.warning("Failed to fetch NC configs from API: %s", exc)
|
||||
return []
|
||||
|
||||
|
||||
def build_nc_watchers(
|
||||
configs: list[dict],
|
||||
settings: WatcherSettings,
|
||||
) -> dict[str, NextcloudWatcher]:
|
||||
"""Build one NextcloudWatcher per band from the API config payload."""
|
||||
watchers: dict[str, NextcloudWatcher] = {}
|
||||
for cfg in configs:
|
||||
band_id = cfg["band_id"]
|
||||
try:
|
||||
watchers[band_id] = NextcloudWatcher(
|
||||
band_id=band_id,
|
||||
nc_url=cfg["nc_url"],
|
||||
nc_username=cfg["nc_username"],
|
||||
nc_app_password=cfg["nc_app_password"],
|
||||
audio_extensions=settings.audio_extensions,
|
||||
)
|
||||
except Exception as exc:
|
||||
log.error("Failed to create watcher for band %s: %s", band_id, exc)
|
||||
return watchers
|
||||
|
||||
|
||||
async def register_event_with_api(event: FileEvent, settings: WatcherSettings) -> bool:
|
||||
"""Forward a FileEvent to the API's internal nc-upload endpoint."""
|
||||
payload = {"nc_file_path": event.file_path, "nc_file_etag": event.etag}
|
||||
headers = {"X-Internal-Token": settings.internal_secret}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=15.0) as c:
|
||||
resp = await c.post(
|
||||
f"{settings.api_url}/api/v1/internal/nc-upload",
|
||||
json=payload,
|
||||
headers=headers,
|
||||
)
|
||||
if resp.status_code in (200, 201):
|
||||
log.info("Registered version via internal API: %s", nc_file_path)
|
||||
log.info("Registered event via internal API: %s", event.file_path)
|
||||
return True
|
||||
log.warning(
|
||||
"Internal API returned %d for %s: %s",
|
||||
resp.status_code, nc_file_path, resp.text[:200],
|
||||
resp.status_code, event.file_path, resp.text[:200],
|
||||
)
|
||||
return False
|
||||
except Exception as exc:
|
||||
log.warning("Failed to register version with API for %s: %s", nc_file_path, exc)
|
||||
log.warning("Failed to register event with API for %s: %s", event.file_path, exc)
|
||||
return False
|
||||
|
||||
|
||||
async def poll_once(nc_client: NextcloudWatcherClient, settings: WatcherSettings) -> None:
|
||||
global _last_activity_id
|
||||
|
||||
activities = await nc_client.get_activities(since_id=_last_activity_id)
|
||||
if not activities:
|
||||
log.info("No new activities since id=%d", _last_activity_id)
|
||||
return
|
||||
|
||||
log.info("Received %d activities (since id=%d)", len(activities), _last_activity_id)
|
||||
|
||||
for activity in activities:
|
||||
activity_id = int(activity.get("activity_id", 0))
|
||||
activity_type = activity.get("type", "")
|
||||
subject = activity.get("subject", "")
|
||||
raw_path = extract_nc_file_path(activity)
|
||||
|
||||
# Advance the cursor regardless of whether we act on this event
|
||||
_last_activity_id = max(_last_activity_id, activity_id)
|
||||
|
||||
log.info(
|
||||
"Activity id=%d type=%r subject=%r raw_path=%r",
|
||||
activity_id, activity_type, subject, raw_path,
|
||||
)
|
||||
|
||||
if raw_path is None:
|
||||
log.info(" → skip: no file path in activity payload")
|
||||
continue
|
||||
|
||||
nc_path = normalize_nc_path(raw_path, nc_client.username)
|
||||
log.info(" → normalized path: %r", nc_path)
|
||||
|
||||
# Only care about audio files — skip everything else immediately
|
||||
if not is_audio_file(nc_path, settings.audio_extensions):
|
||||
log.info(
|
||||
" → skip: not an audio file (ext=%s)",
|
||||
Path(nc_path).suffix.lower() or "<none>",
|
||||
)
|
||||
continue
|
||||
|
||||
if activity_type not in _UPLOAD_TYPES and subject not in _UPLOAD_SUBJECTS:
|
||||
log.info(
|
||||
" → skip: type=%r subject=%r is not a file upload event",
|
||||
activity_type, subject,
|
||||
)
|
||||
continue
|
||||
|
||||
log.info(" → MATCH — registering audio upload: %s", nc_path)
|
||||
etag = await nc_client.get_file_etag(nc_path)
|
||||
success = await register_version_with_api(nc_path, etag, settings.api_url)
|
||||
if not success:
|
||||
log.warning(" → FAILED to register upload for activity %d (%s)", activity_id, nc_path)
|
||||
async def poll_all_once(
|
||||
watchers: dict[str, WatcherClient],
|
||||
cursors: dict[str, str | None],
|
||||
settings: WatcherSettings,
|
||||
) -> None:
|
||||
"""Poll every watcher once and forward new events to the API."""
|
||||
for band_id, watcher in watchers.items():
|
||||
cursor = cursors.get(band_id)
|
||||
try:
|
||||
events, new_cursor = await watcher.poll_changes(cursor)
|
||||
cursors[band_id] = new_cursor
|
||||
if not events:
|
||||
log.debug("Band %s: no new events (cursor=%s)", band_id, new_cursor)
|
||||
continue
|
||||
log.info("Band %s: %d new event(s)", band_id, len(events))
|
||||
for event in events:
|
||||
await register_event_with_api(event, settings)
|
||||
except Exception as exc:
|
||||
log.exception("Poll error for band %s: %s", band_id, exc)
|
||||
|
||||
@@ -6,11 +6,13 @@ import asyncio
|
||||
import logging
|
||||
|
||||
from watcher.config import get_settings
|
||||
from watcher.event_loop import poll_once
|
||||
from watcher.nc_client import NextcloudWatcherClient
|
||||
from watcher.event_loop import (
|
||||
build_nc_watchers,
|
||||
fetch_nextcloud_configs,
|
||||
poll_all_once,
|
||||
)
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s %(levelname)s %(name)s %(message)s")
|
||||
# Quiet httpx's per-request noise at DEBUG; keep our own loggers verbose
|
||||
logging.getLogger("httpx").setLevel(logging.INFO)
|
||||
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
||||
log = logging.getLogger("watcher")
|
||||
@@ -18,22 +20,39 @@ log = logging.getLogger("watcher")
|
||||
|
||||
async def main() -> None:
|
||||
settings = get_settings()
|
||||
nc = NextcloudWatcherClient(
|
||||
base_url=settings.nextcloud_url,
|
||||
username=settings.nextcloud_user,
|
||||
password=settings.nextcloud_pass,
|
||||
)
|
||||
log.info("Starting watcher (poll_interval=%ds)", settings.poll_interval)
|
||||
|
||||
log.info("Waiting for Nextcloud to become available...")
|
||||
while not await nc.is_healthy():
|
||||
await asyncio.sleep(10)
|
||||
log.info("Nextcloud is ready. Starting poll loop (interval=%ds)", settings.poll_interval)
|
||||
# Per-band WatcherClient instances; keyed by band_id string
|
||||
watchers: dict = {}
|
||||
# Per-band opaque cursors (last seen activity ID, page token, etc.)
|
||||
cursors: dict[str, str | None] = {}
|
||||
poll_cycle = 0
|
||||
|
||||
while True:
|
||||
try:
|
||||
await poll_once(nc, settings)
|
||||
except Exception as exc:
|
||||
log.exception("Poll error: %s", exc)
|
||||
# Refresh the list of bands (and their storage configs) periodically.
|
||||
refresh = (
|
||||
poll_cycle == 0
|
||||
or (settings.config_refresh_interval > 0 and poll_cycle % settings.config_refresh_interval == 0)
|
||||
)
|
||||
if refresh:
|
||||
log.info("Refreshing storage configs from API…")
|
||||
configs = await fetch_nextcloud_configs(settings)
|
||||
if configs:
|
||||
watchers = build_nc_watchers(configs, settings)
|
||||
# Preserve cursors for bands that were already being watched
|
||||
for band_id in watchers:
|
||||
cursors.setdefault(band_id, None)
|
||||
log.info("Watching %d Nextcloud band(s): %s", len(watchers), list(watchers))
|
||||
else:
|
||||
log.warning("No Nextcloud storage configs received — no bands to watch")
|
||||
|
||||
if watchers:
|
||||
try:
|
||||
await poll_all_once(watchers, cursors, settings)
|
||||
except Exception as exc:
|
||||
log.exception("Unexpected error in poll loop: %s", exc)
|
||||
|
||||
poll_cycle += 1
|
||||
await asyncio.sleep(settings.poll_interval)
|
||||
|
||||
|
||||
|
||||
116
watcher/src/watcher/nc_watcher.py
Normal file
116
watcher/src/watcher/nc_watcher.py
Normal file
@@ -0,0 +1,116 @@
|
||||
"""Nextcloud WatcherClient implementation.
|
||||
|
||||
Polls the Nextcloud Activity API to detect new / modified audio files.
|
||||
The cursor is the last seen ``activity_id`` (stored as a string for
|
||||
protocol compatibility).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from watcher.nc_client import NextcloudWatcherClient
|
||||
from watcher.protocol import FileEvent
|
||||
|
||||
log = logging.getLogger("watcher.nc_watcher")
|
||||
|
||||
_UPLOAD_TYPES = {"file_created", "file_changed"}
|
||||
_UPLOAD_SUBJECTS = {
|
||||
"created_by",
|
||||
"changed_by",
|
||||
"created_public",
|
||||
"created_self",
|
||||
"changed_self",
|
||||
}
|
||||
|
||||
|
||||
class NextcloudWatcher:
|
||||
"""WatcherClient implementation backed by the Nextcloud Activity API."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
band_id: str,
|
||||
nc_url: str,
|
||||
nc_username: str,
|
||||
nc_app_password: str,
|
||||
audio_extensions: list[str],
|
||||
) -> None:
|
||||
self.band_id = band_id
|
||||
self._audio_extensions = audio_extensions
|
||||
self._nc = NextcloudWatcherClient(
|
||||
base_url=nc_url,
|
||||
username=nc_username,
|
||||
password=nc_app_password,
|
||||
)
|
||||
|
||||
async def poll_changes(self, cursor: str | None) -> tuple[list[FileEvent], str]:
|
||||
since_id = int(cursor) if cursor else 0
|
||||
activities = await self._nc.get_activities(since_id=since_id)
|
||||
|
||||
events: list[FileEvent] = []
|
||||
new_cursor = cursor or "0"
|
||||
|
||||
for activity in activities:
|
||||
activity_id = int(activity.get("activity_id", 0))
|
||||
new_cursor = str(max(int(new_cursor), activity_id))
|
||||
|
||||
activity_type = activity.get("type", "")
|
||||
subject = activity.get("subject", "")
|
||||
raw_path = _extract_file_path(activity)
|
||||
|
||||
if raw_path is None:
|
||||
continue
|
||||
|
||||
nc_path = _normalize_path(raw_path, self._nc.username)
|
||||
log.debug("Activity %d type=%r path=%r", activity_id, activity_type, nc_path)
|
||||
|
||||
if not _is_audio(nc_path, self._audio_extensions):
|
||||
continue
|
||||
|
||||
if activity_type not in _UPLOAD_TYPES and subject not in _UPLOAD_SUBJECTS:
|
||||
continue
|
||||
|
||||
etag = await self._nc.get_file_etag(nc_path)
|
||||
events.append(
|
||||
FileEvent(
|
||||
band_id=self.band_id,
|
||||
file_path=nc_path,
|
||||
event_type="created" if "created" in activity_type else "modified",
|
||||
etag=etag,
|
||||
)
|
||||
)
|
||||
|
||||
return events, new_cursor
|
||||
|
||||
async def is_healthy(self) -> bool:
|
||||
return await self._nc.is_healthy()
|
||||
|
||||
|
||||
# ── Helpers ────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _extract_file_path(activity: dict) -> str | None:
|
||||
objects = activity.get("objects", {})
|
||||
if isinstance(objects, dict):
|
||||
for _, file_path in objects.items():
|
||||
if isinstance(file_path, str):
|
||||
return file_path
|
||||
return activity.get("object_name") or None
|
||||
|
||||
|
||||
def _normalize_path(raw_path: str, username: str) -> str:
|
||||
path = raw_path.strip("/")
|
||||
dav_prefix = f"remote.php/dav/files/{username}/"
|
||||
if path.startswith(dav_prefix):
|
||||
return path[len(dav_prefix):]
|
||||
user_files_prefix = f"{username}/files/"
|
||||
if path.startswith(user_files_prefix):
|
||||
return path[len(user_files_prefix):]
|
||||
if path.startswith("files/"):
|
||||
return path[len("files/"):]
|
||||
return path
|
||||
|
||||
|
||||
def _is_audio(path: str, extensions: list[str]) -> bool:
|
||||
return Path(path).suffix.lower() in extensions
|
||||
42
watcher/src/watcher/protocol.py
Normal file
42
watcher/src/watcher/protocol.py
Normal file
@@ -0,0 +1,42 @@
|
||||
"""WatcherClient protocol — abstracts provider-specific change-detection APIs.
|
||||
|
||||
Each storage provider implements its own change detection:
|
||||
Nextcloud → Activity API (polling)
|
||||
Google Drive → Changes API or webhook push
|
||||
OneDrive → Microsoft Graph subscriptions
|
||||
Dropbox → Long-poll or webhooks
|
||||
|
||||
All implementations must satisfy this protocol so the event loop can treat
|
||||
them uniformly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Protocol
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileEvent:
|
||||
"""A file-change event emitted by a WatcherClient."""
|
||||
band_id: str
|
||||
file_path: str # Provider-relative path (no host, no DAV prefix)
|
||||
event_type: str # 'created' | 'modified' | 'deleted'
|
||||
etag: str | None = None
|
||||
|
||||
|
||||
class WatcherClient(Protocol):
|
||||
band_id: str
|
||||
|
||||
async def poll_changes(self, cursor: str | None) -> tuple[list[FileEvent], str]:
|
||||
"""Return (events, new_cursor) since the given cursor.
|
||||
|
||||
``cursor`` is an opaque string whose meaning is implementation-defined
|
||||
(e.g., an activity ID for Nextcloud, a page token for Google Drive).
|
||||
Pass ``None`` to start from the current position (i.e. only new events).
|
||||
"""
|
||||
...
|
||||
|
||||
async def is_healthy(self) -> bool:
|
||||
"""Return True if the storage backend is reachable."""
|
||||
...
|
||||
Reference in New Issue
Block a user