# media-stack.yaml — master spec (taxonomy + ops + libs + routing + tools)
version: 0.1
taxonomy:
media_buckets:
1_media_types: [video, audio, images, subtitles, data]
2_codecs:
video: [h264, hevc, av1, vp9, prores, theora]
audio: [aac, opus, mp3, flac, vorbis, pcm_s16le, alac]
3_encoders_decoders:
encoders: [libx264, libx265, libaom-av1, libvpx-vp9, h264_nvenc, hevc_qsv, libopus, libmp3lame, aac]
decoders: [h264, hevc, av1, vp9, aac, opus, mp3]
4_containers: [mp4, mkv, webm, mov, avi, ts, mp3, m4a, wav, flac, ogg, jpg, png, webp, avif, heic, tiff, pdf, svg, ico]
5_file_extensions_types:
extensions_examples: [.mp4, .mkv, .webm, .mp3, .jpg, .png, .pdf, .svg]
mime_examples: [video/mp4, audio/mpeg, image/png, application/pdf]
6_stream_properties:
video: [resolution, fps, aspect_ratio, color_space, bitrate]
audio: [sample_rate, channels, bitrate]
7_processing_operations_fine: [mux, demux, remux, transcode, filter]
8_delivery_modes: [progressive, streaming] # streaming: HLS/DASH/RTMP/RTSP
9_protections: [none, drm, hdcp, geoblock, signed_url, watermark]
10_protocols: [file, http, https, ftp, rtmp, rtsp, hls, dash, concat, cache, bluray, crypto, data, device]
11_ffmpeg_libraries: [libavcodec, libavformat, libavfilter, libswscale, libswresample]
12_extractors_detectors: [yt-dlp_extractors, format_sniffers]
13_manifests_playlists: [.m3u8, .mpd, .pls, .asx]
14_descriptors_identifiers: [ID, EXT, PROTO, TBR, FPS, VCODEC, ACODEC, ABR, ASR, language, quality_label]
15_filters_effects:
video: [scale, crop, fps, overlay, subtitles, drawtext]
audio: [volume, resample, loudnorm, highpass, lowpass, afftdn]
16_metadata_tags: [id3, vorbis_comments, mp4_atoms, chapters, cover_art, language_tags]
17_devices_inputs: [avfoundation, dshow, alsa, pulse, x11grab, webcams, microphones]
operations_coarse: # your keyword-based verbs
- convert
- compress
- upscale
- shrink
- combine
- split
- download
- record
- extract
- edit
- analyze
- package
engines:
- wasm # browser/local (e.g., libvips via WASM)
- vertd # video/audio via ffmpeg daemon (VERT)
- external # outside stacks: LibreOffice, Poppler, etc.
- unsupported
- unknown
delivery_modes: [progressive, streaming]
protocols: [file, http, https, ftp, rtmp, rtsp, hls, dash, concat, cache, data, device]
media_types: [video, audio, image_raster, image_vector, document, subtitle, archive, playlist]
libraries_catalog:
# top-level tools
tools: [yt-dlp, ffmpeg, ffprobe, VERT, vertd, streamlink, imagemagick, libvips, exiftool, sox]
# image stacks
image:
core: [libvips, imagemagick]
codecs: [libjpeg-turbo, libpng, libwebp, libavif, libjxl, libtiff, libheif]
optimize: [pngquant, oxipng, mozjpeg, gifsicle, svgo]
vector_raster: [librsvg, resvg, cairosvg, potrace, icoutils]
# audio/video
av:
ffmpeg: [libx264, libx265, libaom-av1, libvpx-vp9, libopus, libmp3lame, aac]
dsp: [sox, rubberband, libsndfile]
# docs/ebooks/html
docs:
pdf: [poppler, ghostscript, qpdf, pdfcpu]
office: [libreoffice, unoconv]
convert_text: [pandoc]
html_pdf: [wkhtmltopdf, headless_chromium, weasyprint]
ebooks: [calibre]
# archives/fonts
misc:
archives: [libarchive, p7zip, unrar]
fonts: [fonttools]
routing_rules: # compact decision hints for auto-matcher
- if: "media.input == video or output == video or animated == true"
then: { engine: vertd, libraries_add: [ffmpeg] }
- if: "media.input == image_raster and formats.in not in [heic, avif, jxl] and not animated"
then: { engine: wasm, libraries_add: [libvips] }
- if: "formats.in any of [heic, avif, jxl] OR requires_advanced_filters == true"
then: { engine: vertd, libraries_add: [ffmpeg] }
- if: "media.input in [image_vector, document, archive, playlist]"
then: { engine: external }
- if: "delivery.input == streaming or protocol any of [hls, dash, rtmp, rtsp]"
then: { engine: vertd, libraries_add: [ffmpeg] }
- if: "protection in [drm]"
then: { engine: unsupported }
- defaults:
engine: unknown
tool_template: &tool_template
id: "" # slug, e.g., "mp4-to-mp3"
name: "" # human title
operation: "" # from operations_coarse
engine: unknown
libraries: [] # ["yt-dlp", "ffmpeg", "libvips", ...]
media:
input: "" # media_types enum
output: "" # media_types enum
formats:
in: [] # ["mp4"]
out: [] # ["mp3"]
container_in: "" # optional
container_out: "" # optional
codec_in: # optional
video: []
audio: []
codec_out: # optional
video: []
audio: []
delivery:
input: progressive
output: progressive
protection: none # taxonomy.9
protocol: [file] # taxonomy.10
params: {} # e.g., { crf: 23, abr: "192k" }
sample_cmd: "" # ffmpeg / external example
notes: ""
tags: [] # e.g., ["quick","lossless"]
tools: # a few filled examples using the template
- <<: *tool_template
id: mp4-to-mp3
name: MP4 to MP3
operation: convert
engine: vertd
libraries: [yt-dlp, ffmpeg, libmp3lame]
media: { input: video, output: audio }
formats:
in: [mp4]
out: [mp3]
container_in: mp4
container_out: mp3
codec_in: { video: [h264], audio: [aac] }
codec_out: { audio: [mp3] }
protocol: [file, https]
sample_cmd: ffmpeg -i in.mp4 -vn -c:a libmp3lame -q:a 2 out.mp3
notes: extract + transcode audio
- <<: *tool_template
id: webp-to-png
name: WEBP to PNG
operation: convert
engine: wasm
libraries: [libvips]
media: { input: image_raster, output: image_raster }
formats: { in: [webp], out: [png] }
sample_cmd: "" # handled in-browser via libvips
- <<: *tool_template
id: heic-to-jpg
name: HEIC to JPG
operation: convert
engine: vertd
libraries: [ffmpeg, libheif, libjpeg-turbo]
media: { input: image_raster, output: image_raster }
formats: { in: [heic], out: [jpg] }
sample_cmd: ffmpeg -i in.heic out.jpg
notes: wasm builds often lack HEIC decode
- <<: *tool_template
id: mp4-to-gif
name: MP4 to GIF
operation: convert
engine: vertd
libraries: [ffmpeg]
media: { input: video, output: image_raster }
formats: { in: [mp4], out: [gif] }
sample_cmd: >
ffmpeg -i in.mp4 -vf "fps=12,scale=640:-1:flags=lanczos,split[s0][s1];
[s0]palettegen[p];[s1][p]paletteuse" out.gif
- <<: *tool_template
id: pdf-to-png
name: PDF to PNG
operation: convert
engine: external
libraries: [poppler]
media: { input: document, output: image_raster }
formats: { in: [pdf], out: [png] }
sample_cmd: pdftoppm -png -r 200 in.pdf out
- <<: *tool_template
id: merge-pdfs
name: Combine PDFs
operation: combine
engine: external
libraries: [qpdf]
media: { input: document, output: document }
formats: { in: [pdf], out: [pdf] }
sample_cmd: qpdf --empty --pages a.pdf b.pdf c.pdf -- out.pdf
- <<: *tool_template
id: m3u8-download
name: Download HLS (.m3u8)
operation: download
engine: vertd
libraries: [yt-dlp, ffmpeg]
media: { input: playlist, output: video }
formats: { in: [m3u8], out: [mp4] }
delivery: { input: streaming, output: progressive }
protocol: [https, hls]
sample_cmd: yt-dlp -f "bv*+ba/b" -o "%(title)s.%(ext)s" "<m3u8_or_page_url>"
- <<: *tool_template
id: png-compress
name: Compress PNG
operation: compress
engine: wasm
libraries: [libvips, pngquant]
media: { input: image_raster, output: image_raster }
formats: { in: [png], out: [png] }
params: { quality: "60-80" }
notes: lossy quantization for size wins
- <<: *tool_template
id: svg-to-png
name: SVG to PNG
operation: convert
engine: external
libraries: [librsvg]
media: { input: image_vector, output: image_raster }
formats: { in: [svg], out: [png] }
sample_cmd: rsvg-convert -w 1024 -h 1024 in.svg > out.png-
start by appending your 1k tools under
tools:using the*tool_template. -
keep the coarse operations you like (convert/compress/…).
-
let your matcher script read this YAML, apply
routing_rules, and:- fill
engine/librariesif missing, - emit
tools.json(for code) andtools.csv(for spreadsheets), - warn on
engine: unknownorprotection: drm.
- fill
Here’s the same “relevant libraries” list as a **spreadsheet-style table** so you can scan/sort quickly.
---
| Domain | Library / Tool | Purpose / Capabilities | License / Notes |
| ---------------------------- | ------------------------------ | -------------------------------------------- | ----------------------------- |
| **Containers / Packaging** | Bento4 | MP4/HLS/DASH muxing, CMAF, transmux | Permissive |
| | GPAC / MP4Box | MP4 mux/fragment, DASH/HLS packager | LGPL |
| | Shaka Packager | DASH/HLS packaging, clear-key, DRM workflows | BSD (DRM = separate licensed) |
| | MKVToolNix (mkvmerge, mkvinfo) | Matroska mux/split/inspect | GPL |
| **Video Codecs** | x264 / x265 | H.264 / H.265 encoders | GPL |
| | SVT-AV1 | Fast AV1 encoder (Intel) | BSD |
| | libaom-av1 | Reference AV1 encoder | BSD |
| | rav1e | Rust AV1 encoder | Apache |
| | libvpx-vp9 | VP9 codec (Google) | BSD |
| | dav1d | High-performance AV1 decoder | BSD |
| **Audio Codecs** | libopus | Opus codec | BSD |
| | libvorbis | Vorbis codec | BSD |
| | libflac | FLAC codec | BSD |
| | libmp3lame | MP3 encoder | LGPL |
| | fdk-aac | High quality AAC encoder | License encumbered |
| **Hardware Accel** | NVIDIA NVENC/NVDEC | HW encode/decode | Proprietary driver |
| | Intel oneVPL/QSV | HW encode/decode | Permissive |
| | VAAPI | Linux HW accel | MIT |
| | Apple VideoToolbox | macOS HW accel | Proprietary |
| **Filters / Quality** | VapourSynth | Scriptable video filter graphs | MIT |
| | zimg | Colorspace, scaling, HDR→SDR | zlib |
| | libplacebo | GPU shaders, tonemapping | MIT |
| | libvmaf | Netflix VMAF quality metrics | BSD |
| | RNNoise | Neural noise suppression (audio) | BSD |
| **Thumbnails / Waveforms** | ffmpegthumbnailer | Fast video thumbnails | GPL |
| | audiowaveform | Waveform JSON/PNGs | GPL |
| **Subtitles / Captions** | libass | ASS/SSA render (burn-in) | ISC |
| | ccextractor | Extract CC to SRT | GPL |
| | ffsubsync | Auto-sync subs to media | MIT |
| **Images (Raster)** | libheif | HEIC/HEIF codec | LGPL |
| | libavif | AVIF codec | BSD |
| | libjxl | JPEG XL codec | BSD |
| | mozjpeg | Optimized JPEG encoder | BSD |
| | pngquant / oxipng | PNG compress/optimize | GPL/MIT |
| | gifsicle | GIF compress/optimize | GPL |
| **Vector / Icons** | librsvg / resvg | SVG→PNG rasterization | LGPL/Apache |
| | SVGO | SVG optimizer (Node.js) | MIT |
| | potrace | Raster→vector tracing | GPL |
| | icoutils | ICO/ICNS packaging | GPL |
| **Documents / Ebooks / OCR** | Poppler | PDF→images/text | GPL |
| | Ghostscript | PDF/PostScript processing | AGPL |
| | qpdf | PDF merge/split/linearize | Apache |
| | pdfcpu | PDF toolkit (Go) | Apache |
| | LibreOffice / unoconv | Docx/pptx/xlsx↔pdf | MPL/LGPL |
| | Pandoc | Universal doc converter | GPL |
| | Calibre | Ebook conversions | GPL |
| | Tesseract OCR | OCR engine | Apache |
| | OCRmyPDF | Add searchable text layer | MIT |
| **Download / Ingest** | aria2 | Multi-source downloader | GPL |
| | curl / wget | Fetch with cookies/headers | MIT/GPL |
| | N\_m3u8DL-RE | Robust HLS/DASH downloader | GPL |
| | MediaInfo | Media metadata probe | BSD |
| | rclone | Cloud storage sync | MIT |
| **Live / IO / Servers** | MediaMTX (rtsp-simple-server) | RTSP/RTMP/WebRTC server | Apache |
| | NGINX RTMP module | RTMP ingest | BSD |
| | SRT / RIST | Reliable UDP transport | MPL/BSD |
| **Speech / Transcription** | whisper.cpp / faster-whisper | Local transcription (Whisper) | MIT |
| | Vosk | Offline ASR | Apache |
| **Bindings / SDKs** | sharp (Node) | libvips binding | LGPL |
| | fluent-ffmpeg (Node) | FFmpeg wrapper | MIT |
| | moviepy / ffmpeg-python | Python video/audio processing | MIT |
| | pydub | Python audio wrapper | MIT |
| | PyMuPDF / pdfminer.six | Python PDF | GPL |
| | pillow | Python imaging | PIL fork |
| | gstreamer-rs, rav1e (Rust) | Media in Rust | BSD/Apache |
| | pdfcpu (Go), go-mediainfo | Go toolkits | Apache |
---
Domain,Library / Tool,Purpose / Capabilities,License / Notes
Containers / Packaging,Bento4,"MP4/HLS/DASH muxing, CMAF, transmux",Permissive
Containers / Packaging,GPAC / MP4Box,"MP4 mux/fragment, DASH/HLS packager",LGPL
Containers / Packaging,Shaka Packager,"DASH/HLS packaging, clear-key, DRM workflows",BSD (DRM separate)
Containers / Packaging,MKVToolNix,Matroska mux/split/inspect,GPL
Video Codecs,x264 / x265,H.264 / H.265 encoders,GPL
Video Codecs,SVT-AV1,Fast AV1 encoder (Intel),BSD
Video Codecs,libaom-av1,Reference AV1 encoder,BSD
Video Codecs,rav1e,Rust AV1 encoder,Apache
Video Codecs,libvpx-vp9,VP9 codec (Google),BSD
Video Codecs,dav1d,High-performance AV1 decoder,BSD
Audio Codecs,libopus,Opus codec,BSD
Audio Codecs,libvorbis,Vorbis codec,BSD
Audio Codecs,libflac,FLAC codec,BSD
Audio Codecs,libmp3lame,MP3 encoder,LGPL
Audio Codecs,fdk-aac,High quality AAC encoder,Encumbered
Hardware Accel,NVIDIA NVENC/NVDEC,HW encode/decode,Proprietary
Hardware Accel,Intel oneVPL/QSV,HW encode/decode,Permissive
Hardware Accel,VAAPI,Linux HW accel,MIT
Hardware Accel,Apple VideoToolbox,macOS HW accel,Proprietary
Filters / Quality,VapourSynth,Scriptable video filter graphs,MIT
Filters / Quality,zimg,"Colorspace, scaling, HDR→SDR",zlib
Filters / Quality,libplacebo,"GPU shaders, tonemapping",MIT
Filters / Quality,libvmaf,Netflix VMAF quality metrics,BSD
Filters / Quality,RNNoise,Neural noise suppression (audio),BSD
Thumbnails / Waveforms,ffmpegthumbnailer,Fast video thumbnails,GPL
Thumbnails / Waveforms,audiowaveform,Waveform JSON/PNGs,GPL
Subtitles / Captions,libass,ASS/SSA render (burn-in),ISC
Subtitles / Captions,ccextractor,Extract CC to SRT,GPL
Subtitles / Captions,ffsubsync,Auto-sync subs to media,MIT
Images (Raster),libheif,HEIC/HEIF codec,LGPL
Images (Raster),libavif,AVIF codec,BSD
Images (Raster),libjxl,JPEG XL codec,BSD
Images (Raster),mozjpeg,Optimized JPEG encoder,BSD
Images (Raster),pngquant / oxipng,PNG compress/optimize,GPL/MIT
Images (Raster),gifsicle,GIF compress/optimize,GPL
Vector / Icons,librsvg / resvg,SVG→PNG rasterization,LGPL/Apache
Vector / Icons,SVGO,SVG optimizer (Node.js),MIT
Vector / Icons,potrace,Raster→vector tracing,GPL
Vector / Icons,icoutils,ICO/ICNS packaging,GPL
Documents / Ebooks / OCR,Poppler,PDF→images/text,GPL
Documents / Ebooks / OCR,Ghostscript,PDF/PostScript processing,AGPL
Documents / Ebooks / OCR,qpdf,PDF merge/split/linearize,Apache
Documents / Ebooks / OCR,pdfcpu,PDF toolkit (Go),Apache
Documents / Ebooks / OCR,LibreOffice / unoconv,Docx/pptx/xlsx↔pdf,MPL/LGPL
Documents / Ebooks / OCR,Pandoc,Universal doc converter,GPL
Documents / Ebooks / OCR,Calibre,Ebook conversions,GPL
Documents / Ebooks / OCR,Tesseract OCR,OCR engine,Apache
Documents / Ebooks / OCR,OCRmyPDF,Add searchable text layer,MIT
Download / Ingest,aria2,Multi-source downloader,GPL
Download / Ingest,curl / wget,Fetch with cookies/headers,MIT/GPL
Download / Ingest,N_m3u8DL-RE,Robust HLS/DASH downloader,GPL
Download / Ingest,MediaInfo,Media metadata probe,BSD
Download / Ingest,rclone,Cloud storage sync,MIT
Live / IO / Servers,MediaMTX,RTSP/RTMP/WebRTC server,Apache
Live / IO / Servers,NGINX RTMP module,RTMP ingest,BSD
Live / IO / Servers,SRT / RIST,Reliable UDP transport,MPL/BSD
Speech / Transcription,whisper.cpp / faster-whisper,Local transcription (Whisper),MIT
Speech / Transcription,Vosk,Offline ASR,Apache
Bindings / SDKs,sharp (Node),libvips binding,LGPL
Bindings / SDKs,fluent-ffmpeg (Node),FFmpeg wrapper,MIT
Bindings / SDKs,moviepy / ffmpeg-python,Python video/audio processing,MIT
Bindings / SDKs,pydub,Python audio wrapper,MIT
Bindings / SDKs,PyMuPDF / pdfminer.six,Python PDF,GPL
Bindings / SDKs,pillow,Python imaging,PIL fork
Bindings / SDKs,"gstreamer-rs, rav1e (Rust)",Media in Rust,BSD/Apache
Bindings / SDKs,"pdfcpu (Go), go-mediainfo",Go toolkits,Apache