fix(import): decouple scan from HTTP connection, prevent duplicate imports
- Add scan_manager: background asyncio task + Redis event store so scans survive UI navigation; SSE stream reads from Redis and is reconnectable - Replace SSE-tied scan endpoint with POST /nc-scan/start + GET /nc-scan/stream - Fix frontend: AbortController + useEffect cleanup cancels stream on unmount without stopping the server-side scan - Add unique constraint on audio_versions.nc_file_path (migration 0009) to prevent duplicate imports from concurrent scans; handle IntegrityError gracefully in nc_scan with rollback + skip - Fix API health check: use plain python instead of uv (not in dev image) - Optimize Taskfile: fix duplicate dev:restart, add dev:fresh/dev:rebuild/ dev:status, migrate uses run --rm, check includes typecheck Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
36
api/alembic/versions/0009_audio_version_nc_path_unique.py
Normal file
36
api/alembic/versions/0009_audio_version_nc_path_unique.py
Normal file
@@ -0,0 +1,36 @@
|
||||
"""Add unique constraint on audio_versions.nc_file_path.
|
||||
|
||||
Prevents duplicate imports when concurrent scans race on the same file.
|
||||
|
||||
Revision ID: 0009_av_nc_path_uq
|
||||
Revises: 0008_drop_nc_columns
|
||||
Create Date: 2026-04-12
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
|
||||
revision = "0009_av_nc_path_uq"
|
||||
down_revision = "0008_drop_nc_columns"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Remove any existing duplicates first (keep the oldest version per path)
|
||||
op.execute("""
|
||||
DELETE FROM audio_versions
|
||||
WHERE id NOT IN (
|
||||
SELECT DISTINCT ON (nc_file_path) id
|
||||
FROM audio_versions
|
||||
ORDER BY nc_file_path, uploaded_at ASC
|
||||
)
|
||||
""")
|
||||
op.create_unique_constraint(
|
||||
"uq_audio_version_nc_file_path",
|
||||
"audio_versions",
|
||||
["nc_file_path"],
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_constraint("uq_audio_version_nc_file_path", "audio_versions", type_="unique")
|
||||
@@ -277,7 +277,7 @@ class AudioVersion(Base):
|
||||
)
|
||||
version_number: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
label: Mapped[str | None] = mapped_column(String(255))
|
||||
nc_file_path: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
nc_file_path: Mapped[str] = mapped_column(Text, nullable=False, unique=True)
|
||||
nc_file_etag: Mapped[str | None] = mapped_column(String(255))
|
||||
cdn_hls_base: Mapped[str | None] = mapped_column(Text)
|
||||
waveform_url: Mapped[str | None] = mapped_column(Text)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
@@ -8,8 +9,7 @@ from pydantic import BaseModel
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from rehearsalhub.config import get_settings
|
||||
from rehearsalhub.db.engine import get_session, get_session_factory
|
||||
from rehearsalhub.queue.redis_queue import flush_pending_pushes
|
||||
from rehearsalhub.db.engine import get_session
|
||||
from rehearsalhub.db.models import Member
|
||||
from rehearsalhub.dependencies import get_current_member
|
||||
from rehearsalhub.repositories.band import BandRepository
|
||||
@@ -21,6 +21,7 @@ from rehearsalhub.schemas.comment import SongCommentCreate, SongCommentRead
|
||||
from rehearsalhub.schemas.song import SongCreate, SongRead, SongUpdate
|
||||
from rehearsalhub.services.band import BandService
|
||||
from rehearsalhub.services.nc_scan import scan_band_folder
|
||||
from rehearsalhub.services.scan_manager import get_events, is_scanning, start_scan
|
||||
from rehearsalhub.services.song import SongService
|
||||
from rehearsalhub.storage.factory import StorageFactory
|
||||
|
||||
@@ -175,45 +176,60 @@ async def _get_band_and_assert_member(
|
||||
return band
|
||||
|
||||
|
||||
@router.get("/bands/{band_id}/nc-scan/stream")
|
||||
async def scan_nextcloud_stream(
|
||||
@router.post("/bands/{band_id}/nc-scan/start", status_code=202)
|
||||
async def scan_nextcloud_start(
|
||||
band_id: uuid.UUID,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
current_member: Member = Depends(_member_from_request),
|
||||
):
|
||||
"""
|
||||
SSE endpoint: streams scan progress as newline-delimited JSON events.
|
||||
Each event is a JSON object on its own line.
|
||||
Accepts ?token= for EventSource clients that can't set headers.
|
||||
Start a background scan. Returns 202 immediately; progress is streamed via
|
||||
/nc-scan/stream. Returns 409 if a scan is already running for this band.
|
||||
"""
|
||||
band = await _get_band_and_assert_member(band_id, current_member, session)
|
||||
bs = await BandStorageRepository(session).get_active_for_band(band_id)
|
||||
band_folder = (bs.root_path if bs and bs.root_path else None) or f"bands/{band.slug}/"
|
||||
member_id = current_member.id
|
||||
settings = get_settings()
|
||||
|
||||
try:
|
||||
await start_scan(band_id, band_folder, current_member.id, get_settings())
|
||||
except LookupError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(exc))
|
||||
except RuntimeError:
|
||||
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="Scan already in progress")
|
||||
|
||||
return {"status": "started"}
|
||||
|
||||
|
||||
@router.get("/bands/{band_id}/nc-scan/stream")
|
||||
async def scan_nextcloud_stream(
|
||||
band_id: uuid.UUID,
|
||||
cursor: int = Query(default=0, ge=0),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
current_member: Member = Depends(_member_from_request),
|
||||
):
|
||||
"""
|
||||
Stream scan events as newline-delimited JSON. Reads from Redis so this
|
||||
endpoint is independent of the scan's lifecycle — safe to reconnect after
|
||||
navigating away. Pass ?cursor=N to resume from event index N.
|
||||
"""
|
||||
await _get_band_and_assert_member(band_id, current_member, session)
|
||||
|
||||
async def event_generator():
|
||||
async with get_session_factory()() as db:
|
||||
try:
|
||||
storage = await StorageFactory.create(db, band_id, settings)
|
||||
async for event in scan_band_folder(db, storage, band_id, band_folder, member_id):
|
||||
yield json.dumps(event) + "\n"
|
||||
if event.get("type") in ("song", "session"):
|
||||
await db.commit()
|
||||
await flush_pending_pushes(db)
|
||||
except LookupError as exc:
|
||||
yield json.dumps({"type": "error", "message": str(exc)}) + "\n"
|
||||
except Exception:
|
||||
log.exception("SSE scan error for band %s", band_id)
|
||||
yield json.dumps({"type": "error", "message": "Scan failed due to an internal error."}) + "\n"
|
||||
finally:
|
||||
await db.commit()
|
||||
await flush_pending_pushes(db)
|
||||
idx = cursor
|
||||
while True:
|
||||
events = await get_events(band_id, start=idx)
|
||||
for event in events:
|
||||
yield json.dumps(event) + "\n"
|
||||
idx += 1
|
||||
if event.get("type") in ("done", "error"):
|
||||
return
|
||||
if not events:
|
||||
scanning = await is_scanning(band_id)
|
||||
if not scanning:
|
||||
return
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
return StreamingResponse(
|
||||
event_generator(),
|
||||
media_type="application/x-ndjson",
|
||||
)
|
||||
return StreamingResponse(event_generator(), media_type="application/x-ndjson")
|
||||
|
||||
|
||||
@router.post("/bands/{band_id}/nc-scan", response_model=NcScanResult)
|
||||
|
||||
@@ -12,6 +12,7 @@ import logging
|
||||
from collections.abc import AsyncGenerator
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from rehearsalhub.repositories.audio_version import AudioVersionRepository
|
||||
@@ -179,6 +180,13 @@ async def scan_band_folder(
|
||||
)
|
||||
yield {"type": "song", "song": read.model_dump(mode="json"), "is_new": is_new}
|
||||
|
||||
except IntegrityError:
|
||||
# Unique constraint on nc_file_path — another concurrent scan already
|
||||
# imported this file. Roll back the savepoint and treat as skipped.
|
||||
await db_session.rollback()
|
||||
log.debug("scan: concurrent import collision on '%s', skipping", nc_file_path)
|
||||
skipped += 1
|
||||
yield {"type": "skipped", "path": nc_file_path, "reason": "already imported"}
|
||||
except Exception as exc:
|
||||
log.error("Failed to import '%s': %s", nc_file_path, exc, exc_info=True)
|
||||
skipped += 1
|
||||
|
||||
126
api/src/rehearsalhub/services/scan_manager.py
Normal file
126
api/src/rehearsalhub/services/scan_manager.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""Background scan manager.
|
||||
|
||||
Runs nc_scan.scan_band_folder as an asyncio task independent of any HTTP
|
||||
connection. Events are pushed to a Redis list so the SSE endpoint can read
|
||||
them whether or not the original requester is still connected.
|
||||
|
||||
Redis keys (all expire after EVENTS_TTL_SECONDS):
|
||||
scan:{band_id}:status — "running" | "done" | "failed" (string)
|
||||
scan:{band_id}:events — list of JSON-encoded event dicts (rpush / lrange)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
import redis.asyncio as aioredis
|
||||
|
||||
from rehearsalhub.config import get_settings
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
EVENTS_TTL_SECONDS = 3600 # events visible for 1 hour after scan completes
|
||||
_STATUS_KEY = "scan:{band_id}:status"
|
||||
_EVENTS_KEY = "scan:{band_id}:events"
|
||||
|
||||
# In-process task registry — prevents duplicate scans within the same worker pod.
|
||||
_running: dict[str, asyncio.Task] = {}
|
||||
|
||||
|
||||
def _status_key(band_id: uuid.UUID) -> str:
|
||||
return f"scan:{band_id}:status"
|
||||
|
||||
|
||||
def _events_key(band_id: uuid.UUID) -> str:
|
||||
return f"scan:{band_id}:events"
|
||||
|
||||
|
||||
async def _get_redis() -> aioredis.Redis:
|
||||
return aioredis.from_url(get_settings().redis_url, decode_responses=True)
|
||||
|
||||
|
||||
async def is_scanning(band_id: uuid.UUID) -> bool:
|
||||
r = await _get_redis()
|
||||
status = await r.get(_status_key(band_id))
|
||||
await r.aclose()
|
||||
return status == "running"
|
||||
|
||||
|
||||
async def get_events(band_id: uuid.UUID, start: int = 0) -> list[dict]:
|
||||
"""Return events from index *start* onwards (0-based)."""
|
||||
r = await _get_redis()
|
||||
raw = await r.lrange(_events_key(band_id), start, -1)
|
||||
await r.aclose()
|
||||
events = []
|
||||
for item in raw:
|
||||
with contextlib.suppress(Exception):
|
||||
events.append(json.loads(item))
|
||||
return events
|
||||
|
||||
|
||||
async def start_scan(
|
||||
band_id: uuid.UUID,
|
||||
band_folder: str,
|
||||
member_id: uuid.UUID,
|
||||
settings,
|
||||
) -> None:
|
||||
"""Launch a background scan task. Raises RuntimeError if already running."""
|
||||
key = str(band_id)
|
||||
task = _running.get(key)
|
||||
if task and not task.done():
|
||||
raise RuntimeError("Scan already in progress")
|
||||
|
||||
# Clear previous events
|
||||
r = await _get_redis()
|
||||
await r.delete(_events_key(band_id))
|
||||
await r.set(_status_key(band_id), "running", ex=EVENTS_TTL_SECONDS)
|
||||
await r.aclose()
|
||||
|
||||
task = asyncio.create_task(_run_scan(band_id, band_folder, member_id, settings))
|
||||
_running[key] = task
|
||||
|
||||
|
||||
async def _run_scan(
|
||||
band_id: uuid.UUID,
|
||||
band_folder: str,
|
||||
member_id: uuid.UUID,
|
||||
settings,
|
||||
) -> None:
|
||||
from rehearsalhub.db.engine import get_session_factory
|
||||
from rehearsalhub.queue.redis_queue import flush_pending_pushes
|
||||
from rehearsalhub.services.nc_scan import scan_band_folder
|
||||
from rehearsalhub.storage.factory import StorageFactory
|
||||
|
||||
r = await _get_redis()
|
||||
events_key = _events_key(band_id)
|
||||
status_key = _status_key(band_id)
|
||||
|
||||
async def push(event: dict) -> None:
|
||||
await r.rpush(events_key, json.dumps(event))
|
||||
await r.expire(events_key, EVENTS_TTL_SECONDS)
|
||||
|
||||
try:
|
||||
async with get_session_factory()() as db:
|
||||
storage = await StorageFactory.create(db, band_id, settings)
|
||||
async for event in scan_band_folder(db, storage, band_id, band_folder, member_id):
|
||||
await push(event)
|
||||
if event.get("type") in ("song", "session"):
|
||||
await db.commit()
|
||||
await flush_pending_pushes(db)
|
||||
await db.commit()
|
||||
await flush_pending_pushes(db)
|
||||
|
||||
await r.set(status_key, "done", ex=EVENTS_TTL_SECONDS)
|
||||
log.info("Background scan completed for band %s", band_id)
|
||||
|
||||
except Exception as exc:
|
||||
log.exception("Background scan failed for band %s", band_id)
|
||||
await push({"type": "error", "message": "Scan failed due to an internal error."})
|
||||
await r.set(status_key, "failed", ex=EVENTS_TTL_SECONDS)
|
||||
finally:
|
||||
await r.aclose()
|
||||
_running.pop(str(band_id), None)
|
||||
Reference in New Issue
Block a user