fix: resolve job-not-found race and YYMMDD scan folder structure

Race condition (worker "Job not found in DB"): - RedisJobQueue.enqueue() was pushing job IDs to Redis immediately after flush() but before the API transaction committed, so the worker would read an ID that didn't exist yet in the DB from its own session. - Fix: defer the Redis rpush until after session.commit() via a pending- push list drained by get_session() after each successful commit. - Worker: drain stale Redis queue entries on startup to clear any IDs left over from previously uncommitted transactions. - Worker: add 3-attempt retry with 200ms sleep when a job is not found, as a safety net for any remaining propagation edge cases. NC scan folder structure (YYMMDD rehearsal subfolders): - Previously used dir_name as song title for all files in a subdirectory, meaning every file got the folder name (e.g. "231015") as its title. - Fix: derive song title from Path(sub_rel).stem so each audio file gets its own name; use the file's parent path as nc_folder for version grouping. - Rehearsal folder name stored in song.notes as "Rehearsal: YYMMDD". - Added structured logging throughout the scan: entries found, per-folder file counts, skip/create/import decisions, and final summary count. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-28 21:58:46 +01:00
parent f7be1b994d
commit b28472c32f
4 changed files with 103 additions and 40 deletions
--- a/api/src/rehearsalhub/queue/redis_queue.py
+++ b/api/src/rehearsalhub/queue/redis_queue.py
@@ -3,11 +3,13 @@
 Strategy: Postgres is the source of truth (durable audit log + retry counts).
 Redis holds a list of job UUIDs for fast signaling. Workers pop a UUID, load
 the full payload from Postgres, process, then update status in Postgres.
+
+The Redis push is deferred until AFTER the session commits so the worker
+never reads a job ID that isn't yet visible in the DB.
 """

 from __future__ import annotations

-import json
 import uuid
 from datetime import datetime, timezone
 from typing import Any
@@ -18,6 +20,8 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from rehearsalhub.config import get_settings
 from rehearsalhub.db.models import Job

+_PENDING_ATTR = "_pending_redis_pushes"
+

 class RedisJobQueue:
    def __init__(self, session: AsyncSession, redis_client: aioredis.Redis | None = None) -> None:
@@ -34,24 +38,23 @@ class RedisJobQueue:
        self._session.add(job)
        await self._session.flush()
        await self._session.refresh(job)
+        job_id = job.id

-        r = await self._get_redis()
-        queue_key = get_settings().job_queue_key
-        await r.rpush(queue_key, str(job.id))
-        return job.id
+        # Defer the Redis push until get_session commits, so the worker never
+        # reads a job ID that isn't yet visible in the DB.
+        pending: list = getattr(self._session, _PENDING_ATTR, None)  # type: ignore[attr-defined]
+        if pending is None:
+            pending = []
+            setattr(self._session, _PENDING_ATTR, pending)  # type: ignore[attr-defined]

-    async def dequeue(self, timeout: int = 5) -> tuple[uuid.UUID, str, dict[str, Any]] | None:
-        r = await self._get_redis()
+        redis_client = await self._get_redis()
        queue_key = get_settings().job_queue_key
-        result = await r.blpop(queue_key, timeout=timeout)
-        if result is None:
-            return None
-        _, raw_id = result
-        job_id = uuid.UUID(raw_id)
-        job = await self._session.get(Job, job_id)
-        if job is None:
-            return None
-        return job.id, job.type, job.payload
+
+        async def _push() -> None:
+            await redis_client.rpush(queue_key, str(job_id))
+
+        pending.append(_push)
+        return job_id

    async def mark_running(self, job_id: uuid.UUID) -> None:
        job = await self._session.get(Job, job_id)
@@ -79,3 +82,13 @@ class RedisJobQueue:
    async def close(self) -> None:
        if self._redis:
            await self._redis.aclose()
+
+
+async def flush_pending_pushes(session: AsyncSession) -> None:
+    """Called by get_session after commit() to fire deferred Redis pushes."""
+    pending: list | None = getattr(session, _PENDING_ATTR, None)  # type: ignore[attr-defined]
+    if not pending:
+        return
+    for push in pending:
+        await push()
+    pending.clear()