Refactor storage to provider-agnostic band-scoped model

Replaces per-member Nextcloud credentials with a BandStorage model that
supports multiple providers. Credentials are Fernet-encrypted at rest;
worker receives audio via an internal streaming endpoint instead of
direct storage access.

- Add BandStorage DB model with partial unique index (one active per band)
- Add migrations 0007 (create band_storage) and 0008 (drop old nc columns)
- Add StorageFactory that builds the correct StorageClient from BandStorage
- Add storage router: connect/nextcloud, OAuth2 authorize/callback, list, disconnect
- Add Fernet encryption helpers in security/encryption.py
- Rewrite watcher for per-band polling via internal API config endpoint
- Update worker to stream audio from API instead of accessing storage directly
- Update frontend: new storage API in bands.ts, rewritten StorageSection,
  simplified band creation modal (no storage step)
- Add STORAGE_ENCRYPTION_KEY to all docker-compose files

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mistral Vibe
2026-04-10 23:22:36 +02:00
parent ba22853bc7
commit b2d6b4d113
44 changed files with 1725 additions and 675 deletions

View File

@@ -1,21 +1,27 @@
"""Core nc-scan logic shared by the blocking and streaming endpoints."""
"""Storage scan logic: walk a band's storage folder and import audio files.
Works against any ``StorageClient`` implementation — Nextcloud, Google Drive, etc.
``StorageClient.list_folder`` must return ``FileMetadata`` objects whose ``path``
field is a *provider-relative* path (i.e. the DAV prefix has already been stripped
by the client implementation).
"""
from __future__ import annotations
import logging
from collections.abc import AsyncGenerator
from pathlib import Path
from urllib.parse import unquote
from sqlalchemy.ext.asyncio import AsyncSession
from rehearsalhub.repositories.audio_version import AudioVersionRepository
from rehearsalhub.repositories.rehearsal_session import RehearsalSessionRepository
from rehearsalhub.repositories.song import SongRepository
from rehearsalhub.schemas.audio_version import AudioVersionCreate
from rehearsalhub.schemas.song import SongRead
from rehearsalhub.services.session import extract_session_folder, parse_rehearsal_date
from rehearsalhub.services.song import SongService
from rehearsalhub.storage.nextcloud import NextcloudClient
from rehearsalhub.storage.protocol import StorageClient
log = logging.getLogger(__name__)
@@ -26,72 +32,53 @@ AUDIO_EXTENSIONS = {".mp3", ".wav", ".flac", ".ogg", ".m4a", ".aac", ".opus"}
MAX_SCAN_DEPTH = 3
def _make_relative(dav_prefix: str):
"""Return a function that strips the WebDAV prefix and URL-decodes a href."""
def relative(href: str) -> str:
decoded = unquote(href)
if decoded.startswith(dav_prefix):
return decoded[len(dav_prefix):]
# Strip any leading slash for robustness
return decoded.lstrip("/")
return relative
async def collect_audio_files(
nc: NextcloudClient,
relative: object, # Callable[[str], str]
storage: StorageClient,
folder_path: str,
max_depth: int = MAX_SCAN_DEPTH,
_depth: int = 0,
) -> AsyncGenerator[str, None]:
"""
Recursively yield user-relative audio file paths under folder_path.
"""Recursively yield provider-relative audio file paths under *folder_path*.
Handles any depth:
bands/slug/take.wav depth 0
bands/slug/231015/take.wav depth 1
bands/slug/231015/groove/take.wav depth 2 ← was broken before
``storage.list_folder`` is expected to return ``FileMetadata`` with paths
already normalised to provider-relative form (no host, no DAV prefix).
"""
if _depth > max_depth:
log.debug("Max depth %d exceeded at '%s', stopping recursion", max_depth, folder_path)
return
try:
items = await nc.list_folder(folder_path)
items = await storage.list_folder(folder_path)
except Exception as exc:
log.warning("Could not list folder '%s': %s", folder_path, exc)
return
log.info(
"scan depth=%d folder='%s' entries=%d",
_depth, folder_path, len(items),
)
log.info("scan depth=%d folder='%s' entries=%d", _depth, folder_path, len(items))
for item in items:
rel = relative(item.path) # type: ignore[operator]
if rel.endswith("/"):
# It's a subdirectory — recurse
log.info(" → subdir: %s", rel)
async for subpath in collect_audio_files(nc, relative, rel, max_depth, _depth + 1):
path = item.path.lstrip("/")
if path.endswith("/"):
log.info(" → subdir: %s", path)
async for subpath in collect_audio_files(storage, path, max_depth, _depth + 1):
yield subpath
else:
ext = Path(rel).suffix.lower()
ext = Path(path).suffix.lower()
if ext in AUDIO_EXTENSIONS:
log.info(" → audio file: %s", rel)
yield rel
log.info(" → audio file: %s", path)
yield path
elif ext:
log.debug(" → skip (ext=%s): %s", ext, rel)
log.debug(" → skip (ext=%s): %s", ext, path)
async def scan_band_folder(
db_session: AsyncSession,
nc: NextcloudClient,
storage: StorageClient,
band_id,
band_folder: str,
member_id,
) -> AsyncGenerator[dict, None]:
"""
Async generator that scans band_folder and yields event dicts:
"""Async generator that scans *band_folder* and yields event dicts:
{"type": "progress", "message": str}
{"type": "song", "song": SongRead-dict, "is_new": bool}
{"type": "session", "session": {id, date, label}}
@@ -99,11 +86,9 @@ async def scan_band_folder(
{"type": "done", "stats": {found, imported, skipped}}
{"type": "error", "message": str}
"""
dav_prefix = f"/remote.php/dav/files/{nc._auth[0]}/"
relative = _make_relative(dav_prefix)
session_repo = RehearsalSessionRepository(db_session)
song_repo = SongRepository(db_session)
version_repo = AudioVersionRepository(db_session)
song_svc = SongService(db_session)
found = 0
@@ -112,23 +97,28 @@ async def scan_band_folder(
yield {"type": "progress", "message": f"Scanning {band_folder}"}
async for nc_file_path in collect_audio_files(nc, relative, band_folder):
async for nc_file_path in collect_audio_files(storage, band_folder):
found += 1
song_folder = str(Path(nc_file_path).parent).rstrip("/") + "/"
song_title = Path(nc_file_path).stem
# If the file sits directly inside a dated session folder (YYMMDD/file.wav),
# give it a unique virtual folder so each file becomes its own song rather
# than being merged as a new version of the first file in that folder.
# give it a unique virtual folder so each file becomes its own song.
session_folder_path = extract_session_folder(nc_file_path)
if session_folder_path and session_folder_path.rstrip("/") == song_folder.rstrip("/"):
song_folder = song_folder + song_title + "/"
yield {"type": "progress", "message": f"Checking {Path(nc_file_path).name}"}
# Fetch file metadata (etag + size) — one PROPFIND per file
existing = await version_repo.get_by_nc_file_path(nc_file_path)
if existing is not None:
log.debug("scan: skipping already-registered '%s' (version %s)", nc_file_path, existing.id)
skipped += 1
yield {"type": "skipped", "path": nc_file_path, "reason": "already imported"}
continue
try:
meta = await nc.get_file_metadata(nc_file_path)
meta = await storage.get_file_metadata(nc_file_path)
etag = meta.etag
except Exception as exc:
log.error("Metadata fetch failed for '%s': %s", nc_file_path, exc, exc_info=True)
@@ -137,7 +127,6 @@ async def scan_band_folder(
continue
try:
# Resolve or create a RehearsalSession from a YYMMDD folder segment
rehearsal_date = parse_rehearsal_date(nc_file_path)
rehearsal_session_id = None
if rehearsal_date:
@@ -154,7 +143,6 @@ async def scan_band_folder(
},
}
# Find or create the Song record
song = await song_repo.get_by_nc_folder_path(song_folder)
if song is None:
song = await song_repo.get_by_title_and_band(band_id, song_title)
@@ -173,7 +161,6 @@ async def scan_band_folder(
elif rehearsal_session_id and song.session_id is None:
song = await song_repo.update(song, session_id=rehearsal_session_id)
# Register the audio version
version = await song_svc.register_version(
song.id,
AudioVersionCreate(
@@ -187,7 +174,9 @@ async def scan_band_folder(
log.info("Imported '%s' as version %s for song '%s'", nc_file_path, version.id, song.title)
imported += 1
read = SongRead.model_validate(song).model_copy(update={"version_count": 1, "session_id": rehearsal_session_id})
read = SongRead.model_validate(song).model_copy(
update={"version_count": 1, "session_id": rehearsal_session_id}
)
yield {"type": "song", "song": read.model_dump(mode="json"), "is_new": is_new}
except Exception as exc: