fix(worker): don't set cdn_hls_base until HLS is uploaded; add reindex
Two bugs fixed:
1. handle_transcode was writing cdn_hls_base = "hls/{version_id}" to the DB
even though HLS files were only in a temp dir (never uploaded to Nextcloud).
The stream endpoint then tried to serve this non-existent path, returning 404
and breaking audio playback for every transcoded version. Removed the
cdn_hls_base write — stream endpoint falls back to nc_file_path (raw file),
which works correctly.
2. Added extract_peaks worker job type: lightweight job that downloads audio
and computes waveform_peaks + waveform_peaks_mini only. No transcode, no HLS,
no full analysis.
3. Added POST /internal/reindex-peaks endpoint (protected by internal secret):
finds all audio_versions with null waveform_peaks and enqueues extract_peaks
jobs. Safe to call multiple times. Use after a fresh DB scan or peak algorithm
changes.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -66,15 +66,14 @@ async def handle_transcode(payload: dict, session: AsyncSession, settings) -> No
|
||||
peaks_500 = await loop.run_in_executor(None, extract_peaks, audio, 500)
|
||||
peaks_100 = await loop.run_in_executor(None, extract_peaks, audio, 100)
|
||||
|
||||
# TODO: Upload HLS segments back to Nextcloud / object storage
|
||||
# For now, store the local tmp path in the DB (replace with real upload logic)
|
||||
hls_nc_path = f"hls/{version_id}"
|
||||
|
||||
# NOTE: HLS upload to Nextcloud is not yet implemented.
|
||||
# cdn_hls_base is intentionally left unchanged here — do NOT set it to a
|
||||
# local tmp path that will be deleted. The stream endpoint falls back to
|
||||
# nc_file_path (raw file from Nextcloud) when cdn_hls_base is null.
|
||||
stmt = (
|
||||
update(AudioVersionModel)
|
||||
.where(AudioVersionModel.id == version_id)
|
||||
.values(
|
||||
cdn_hls_base=hls_nc_path,
|
||||
waveform_peaks=peaks_500,
|
||||
waveform_peaks_mini=peaks_100,
|
||||
duration_ms=duration_ms,
|
||||
@@ -106,9 +105,41 @@ async def handle_analyse_range(payload: dict, session: AsyncSession, settings) -
|
||||
log.info("Range analysis complete for annotation %s", annotation_id)
|
||||
|
||||
|
||||
async def handle_extract_peaks(payload: dict, session: AsyncSession, settings) -> None:
|
||||
"""Lightweight job: download audio and (re-)compute waveform peaks only.
|
||||
|
||||
Used by the reindex endpoint to backfill peaks for versions that were
|
||||
registered before peak computation was added, or after algorithm changes.
|
||||
Does NOT transcode, generate HLS, or run full analysis.
|
||||
"""
|
||||
version_id = uuid.UUID(payload["version_id"])
|
||||
nc_path = payload["nc_file_path"]
|
||||
|
||||
with tempfile.TemporaryDirectory(dir=settings.audio_tmp_dir) as tmp:
|
||||
audio, _sr, _local_path = await load_audio(nc_path, tmp, settings)
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
peaks_500 = await loop.run_in_executor(None, extract_peaks, audio, 500)
|
||||
peaks_100 = await loop.run_in_executor(None, extract_peaks, audio, 100)
|
||||
|
||||
stmt = (
|
||||
update(AudioVersionModel)
|
||||
.where(AudioVersionModel.id == version_id)
|
||||
.values(
|
||||
waveform_peaks=peaks_500,
|
||||
waveform_peaks_mini=peaks_100,
|
||||
)
|
||||
)
|
||||
await session.execute(stmt)
|
||||
await session.commit()
|
||||
|
||||
log.info("extract_peaks complete for version %s", version_id)
|
||||
|
||||
|
||||
HANDLERS = {
|
||||
"transcode": handle_transcode,
|
||||
"analyse_range": handle_analyse_range,
|
||||
"extract_peaks": handle_extract_peaks,
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user