"""Core nc-scan logic shared by the blocking and streaming endpoints.""" from __future__ import annotations import logging from collections.abc import AsyncGenerator from pathlib import Path from urllib.parse import unquote from sqlalchemy.ext.asyncio import AsyncSession from rehearsalhub.repositories.audio_version import AudioVersionRepository from rehearsalhub.repositories.rehearsal_session import RehearsalSessionRepository from rehearsalhub.repositories.song import SongRepository from rehearsalhub.schemas.audio_version import AudioVersionCreate from rehearsalhub.schemas.song import SongRead from rehearsalhub.services.session import extract_session_folder, parse_rehearsal_date from rehearsalhub.services.song import SongService from rehearsalhub.storage.nextcloud import NextcloudClient log = logging.getLogger(__name__) AUDIO_EXTENSIONS = {".mp3", ".wav", ".flac", ".ogg", ".m4a", ".aac", ".opus"} # Maximum folder depth to recurse into below the band root. # Depth 0 = band root, 1 = YYMMDD folder, 2 = song subfolder, 3 = safety margin. MAX_SCAN_DEPTH = 3 def _make_relative(dav_prefix: str): """Return a function that strips the WebDAV prefix and URL-decodes a href.""" def relative(href: str) -> str: decoded = unquote(href) if decoded.startswith(dav_prefix): return decoded[len(dav_prefix):] # Strip any leading slash for robustness return decoded.lstrip("/") return relative async def collect_audio_files( nc: NextcloudClient, relative: object, # Callable[[str], str] folder_path: str, max_depth: int = MAX_SCAN_DEPTH, _depth: int = 0, ) -> AsyncGenerator[str, None]: """ Recursively yield user-relative audio file paths under folder_path. Handles any depth: bands/slug/take.wav depth 0 bands/slug/231015/take.wav depth 1 bands/slug/231015/groove/take.wav depth 2 ← was broken before """ if _depth > max_depth: log.debug("Max depth %d exceeded at '%s', stopping recursion", max_depth, folder_path) return try: items = await nc.list_folder(folder_path) except Exception as exc: log.warning("Could not list folder '%s': %s", folder_path, exc) return log.info( "scan depth=%d folder='%s' entries=%d", _depth, folder_path, len(items), ) for item in items: rel = relative(item.path) # type: ignore[operator] if rel.endswith("/"): # It's a subdirectory — recurse log.info(" → subdir: %s", rel) async for subpath in collect_audio_files(nc, relative, rel, max_depth, _depth + 1): yield subpath else: ext = Path(rel).suffix.lower() if ext in AUDIO_EXTENSIONS: log.info(" → audio file: %s", rel) yield rel elif ext: log.debug(" → skip (ext=%s): %s", ext, rel) async def scan_band_folder( db_session: AsyncSession, nc: NextcloudClient, band_id, band_folder: str, member_id, ) -> AsyncGenerator[dict, None]: """ Async generator that scans band_folder and yields event dicts: {"type": "progress", "message": str} {"type": "song", "song": SongRead-dict, "is_new": bool} {"type": "session", "session": {id, date, label}} {"type": "skipped", "path": str, "reason": str} {"type": "done", "stats": {found, imported, skipped}} {"type": "error", "message": str} """ dav_prefix = f"/remote.php/dav/files/{nc._auth[0]}/" relative = _make_relative(dav_prefix) version_repo = AudioVersionRepository(db_session) session_repo = RehearsalSessionRepository(db_session) song_repo = SongRepository(db_session) song_svc = SongService(db_session) found = 0 imported = 0 skipped = 0 yield {"type": "progress", "message": f"Scanning {band_folder}…"} async for nc_file_path in collect_audio_files(nc, relative, band_folder): found += 1 song_folder = str(Path(nc_file_path).parent).rstrip("/") + "/" song_title = Path(nc_file_path).stem # If the file sits directly inside a dated session folder (YYMMDD/file.wav), # give it a unique virtual folder so each file becomes its own song rather # than being merged as a new version of the first file in that folder. session_folder_path = extract_session_folder(nc_file_path) if session_folder_path and session_folder_path.rstrip("/") == song_folder.rstrip("/"): song_folder = song_folder + song_title + "/" yield {"type": "progress", "message": f"Checking {Path(nc_file_path).name}…"} # Fetch file metadata (etag + size) — one PROPFIND per file try: meta = await nc.get_file_metadata(nc_file_path) etag = meta.etag except Exception as exc: log.warning("Metadata error for '%s': %s", nc_file_path, exc) yield {"type": "skipped", "path": nc_file_path, "reason": f"metadata error: {exc}"} continue # Skip if this exact version is already indexed if etag and await version_repo.get_by_etag(etag): log.info("Already registered (etag match): %s", nc_file_path) skipped += 1 yield {"type": "skipped", "path": nc_file_path, "reason": "already registered"} continue # Resolve or create a RehearsalSession from a YYMMDD folder segment rehearsal_date = parse_rehearsal_date(nc_file_path) rehearsal_session_id = None if rehearsal_date: session_folder = extract_session_folder(nc_file_path) or song_folder rs = await session_repo.get_or_create(band_id, rehearsal_date, session_folder) rehearsal_session_id = rs.id yield { "type": "session", "session": { "id": str(rs.id), "date": rs.date.isoformat(), "label": rs.label, "nc_folder_path": rs.nc_folder_path, }, } # Find or create the Song record song = await song_repo.get_by_nc_folder_path(song_folder) if song is None: song = await song_repo.get_by_title_and_band(band_id, song_title) is_new = song is None if is_new: log.info("Creating song '%s' folder='%s'", song_title, song_folder) song = await song_repo.create( band_id=band_id, session_id=rehearsal_session_id, title=song_title, status="jam", notes=None, nc_folder_path=song_folder, created_by=member_id, ) elif rehearsal_session_id and song.session_id is None: song = await song_repo.update(song, session_id=rehearsal_session_id) # Register the audio version await song_svc.register_version( song.id, AudioVersionCreate( nc_file_path=nc_file_path, nc_file_etag=etag, format=Path(nc_file_path).suffix.lstrip(".").lower(), file_size_bytes=meta.size, ), member_id, ) imported += 1 read = SongRead.model_validate(song).model_copy(update={"version_count": 1, "session_id": rehearsal_session_id}) yield {"type": "song", "song": read.model_dump(mode="json"), "is_new": is_new} yield { "type": "done", "stats": {"found": found, "imported": imported, "skipped": skipped}, }