feat: incremental SSE scan, recursive NC traversal, custom folder support

- nc_scan.py: recursive collect_audio_files (fixes depth-1 bug); scan_band_folder
  yields ndjson events (progress/song/session/skipped/done) for streaming
- songs.py: replace old flat scan with scan_band_folder; add GET nc-scan/stream
  endpoint using _member_from_request so ?token= auth works for fetch-based SSE
- BandPage.tsx: scan button now consumes ndjson stream via fetch+ReadableStream;
  sessions/unattributed invalidated as each song/session event arrives
- session.py: add extract_session_folder() for YYMMDD path extraction
- rehearsal_session.py: get_or_create uses begin_nested() savepoint to handle races
- band.py: add get_by_nc_folder_prefix() for custom nc_folder_path band lookup
- internal.py: nc-upload falls back to prefix match when slug lookup fails
- event_loop.py: remove hardcoded bands/ guard; let internal API handle filtering

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Steffen Schuhmann
2026-03-29 15:09:42 +02:00
parent dc6dd9dcfd
commit 7cad3e544a
8 changed files with 393 additions and 204 deletions

View File

@@ -0,0 +1,196 @@
"""Core nc-scan logic shared by the blocking and streaming endpoints."""
from __future__ import annotations
import logging
from pathlib import Path
from typing import AsyncGenerator
from urllib.parse import unquote
from sqlalchemy.ext.asyncio import AsyncSession
from rehearsalhub.db.models import Member
from rehearsalhub.repositories.audio_version import AudioVersionRepository
from rehearsalhub.repositories.rehearsal_session import RehearsalSessionRepository
from rehearsalhub.repositories.song import SongRepository
from rehearsalhub.schemas.audio_version import AudioVersionCreate
from rehearsalhub.schemas.song import SongRead
from rehearsalhub.services.session import extract_session_folder, parse_rehearsal_date
from rehearsalhub.services.song import SongService
from rehearsalhub.storage.nextcloud import NextcloudClient
log = logging.getLogger(__name__)
AUDIO_EXTENSIONS = {".mp3", ".wav", ".flac", ".ogg", ".m4a", ".aac", ".opus"}
# Maximum folder depth to recurse into below the band root.
# Depth 0 = band root, 1 = YYMMDD folder, 2 = song subfolder, 3 = safety margin.
MAX_SCAN_DEPTH = 3
def _make_relative(dav_prefix: str):
"""Return a function that strips the WebDAV prefix and URL-decodes a href."""
def relative(href: str) -> str:
decoded = unquote(href)
if decoded.startswith(dav_prefix):
return decoded[len(dav_prefix):]
# Strip any leading slash for robustness
return decoded.lstrip("/")
return relative
async def collect_audio_files(
nc: NextcloudClient,
relative: object, # Callable[[str], str]
folder_path: str,
max_depth: int = MAX_SCAN_DEPTH,
_depth: int = 0,
) -> AsyncGenerator[str, None]:
"""
Recursively yield user-relative audio file paths under folder_path.
Handles any depth:
bands/slug/take.wav depth 0
bands/slug/231015/take.wav depth 1
bands/slug/231015/groove/take.wav depth 2 ← was broken before
"""
if _depth > max_depth:
log.debug("Max depth %d exceeded at '%s', stopping recursion", max_depth, folder_path)
return
try:
items = await nc.list_folder(folder_path)
except Exception as exc:
log.warning("Could not list folder '%s': %s", folder_path, exc)
return
log.info(
"scan depth=%d folder='%s' entries=%d",
_depth, folder_path, len(items),
)
for item in items:
rel = relative(item.path) # type: ignore[operator]
if rel.endswith("/"):
# It's a subdirectory — recurse
log.info(" → subdir: %s", rel)
async for subpath in collect_audio_files(nc, relative, rel, max_depth, _depth + 1):
yield subpath
else:
ext = Path(rel).suffix.lower()
if ext in AUDIO_EXTENSIONS:
log.info(" → audio file: %s", rel)
yield rel
elif ext:
log.debug(" → skip (ext=%s): %s", ext, rel)
async def scan_band_folder(
db_session: AsyncSession,
nc: NextcloudClient,
band_id,
band_folder: str,
member_id,
) -> AsyncGenerator[dict, None]:
"""
Async generator that scans band_folder and yields event dicts:
{"type": "progress", "message": str}
{"type": "song", "song": SongRead-dict, "is_new": bool}
{"type": "session", "session": {id, date, label}}
{"type": "skipped", "path": str, "reason": str}
{"type": "done", "stats": {found, imported, skipped}}
{"type": "error", "message": str}
"""
dav_prefix = f"/remote.php/dav/files/{nc._auth[0]}/"
relative = _make_relative(dav_prefix)
version_repo = AudioVersionRepository(db_session)
session_repo = RehearsalSessionRepository(db_session)
song_repo = SongRepository(db_session)
song_svc = SongService(db_session)
found = 0
imported = 0
skipped = 0
yield {"type": "progress", "message": f"Scanning {band_folder}"}
async for nc_file_path in collect_audio_files(nc, relative, band_folder):
found += 1
song_folder = str(Path(nc_file_path).parent).rstrip("/") + "/"
song_title = Path(nc_file_path).stem
yield {"type": "progress", "message": f"Checking {Path(nc_file_path).name}"}
# Fetch file metadata (etag + size) — one PROPFIND per file
try:
meta = await nc.get_file_metadata(nc_file_path)
etag = meta.etag
except Exception as exc:
log.warning("Metadata error for '%s': %s", nc_file_path, exc)
yield {"type": "skipped", "path": nc_file_path, "reason": f"metadata error: {exc}"}
continue
# Skip if this exact version is already indexed
if etag and await version_repo.get_by_etag(etag):
log.info("Already registered (etag match): %s", nc_file_path)
skipped += 1
yield {"type": "skipped", "path": nc_file_path, "reason": "already registered"}
continue
# Resolve or create a RehearsalSession from a YYMMDD folder segment
rehearsal_date = parse_rehearsal_date(nc_file_path)
rehearsal_session_id = None
if rehearsal_date:
session_folder = extract_session_folder(nc_file_path) or song_folder
rs = await session_repo.get_or_create(band_id, rehearsal_date, session_folder)
rehearsal_session_id = rs.id
yield {
"type": "session",
"session": {
"id": str(rs.id),
"date": rs.date.isoformat(),
"label": rs.label,
"nc_folder_path": rs.nc_folder_path,
},
}
# Find or create the Song record
song = await song_repo.get_by_nc_folder_path(song_folder)
if song is None:
song = await song_repo.get_by_title_and_band(band_id, song_title)
is_new = song is None
if is_new:
log.info("Creating song '%s' folder='%s'", song_title, song_folder)
song = await song_repo.create(
band_id=band_id,
session_id=rehearsal_session_id,
title=song_title,
status="jam",
notes=None,
nc_folder_path=song_folder,
created_by=member_id,
)
elif rehearsal_session_id and song.session_id is None:
song = await song_repo.update(song, session_id=rehearsal_session_id)
# Register the audio version
await song_svc.register_version(
song.id,
AudioVersionCreate(
nc_file_path=nc_file_path,
nc_file_etag=etag,
format=Path(nc_file_path).suffix.lstrip(".").lower(),
file_size_bytes=meta.size,
),
member_id,
)
imported += 1
read = SongRead.model_validate(song, update={"version_count": 1, "session_id": rehearsal_session_id})
yield {"type": "song", "song": read.model_dump(mode="json"), "is_new": is_new}
yield {
"type": "done",
"stats": {"found": found, "imported": imported, "skipped": skipped},
}

View File

@@ -41,7 +41,20 @@ def parse_rehearsal_date(nc_file_path: str) -> date | None:
return None
def nc_folder_for_path(nc_file_path: str) -> str:
"""Return the parent directory of a file path, with trailing slash."""
from pathlib import Path
return str(Path(nc_file_path).parent).rstrip("/") + "/"
def extract_session_folder(nc_file_path: str) -> str | None:
"""
Return the YYMMDD/YYYYMMDD folder path (with trailing slash) from a file path,
or None if no date segment is found.
e.g. "bands/slug/231015/groove/take.wav""bands/slug/231015/"
"bands/slug/take.wav" → None
"""
for pattern in (_YYYYMMDD_RE, _YYMMDD_RE):
m = pattern.search(nc_file_path)
if m:
idx = m.start(1)
# Walk back to the preceding slash (or start)
start = nc_file_path.rfind("/", 0, idx) + 1
end = m.end(1)
return nc_file_path[:end].rstrip("/") + "/"
return None