Skip to content

Latest commit

 

History

History
397 lines (368 loc) · 22 KB

File metadata and controls

397 lines (368 loc) · 22 KB
# media-stack.yaml  — master spec (taxonomy + ops + libs + routing + tools)

version: 0.1

taxonomy:
  media_buckets:
    1_media_types: [video, audio, images, subtitles, data]
    2_codecs:
      video: [h264, hevc, av1, vp9, prores, theora]
      audio: [aac, opus, mp3, flac, vorbis, pcm_s16le, alac]
    3_encoders_decoders:
      encoders: [libx264, libx265, libaom-av1, libvpx-vp9, h264_nvenc, hevc_qsv, libopus, libmp3lame, aac]
      decoders: [h264, hevc, av1, vp9, aac, opus, mp3]
    4_containers: [mp4, mkv, webm, mov, avi, ts, mp3, m4a, wav, flac, ogg, jpg, png, webp, avif, heic, tiff, pdf, svg, ico]
    5_file_extensions_types:
      extensions_examples: [.mp4, .mkv, .webm, .mp3, .jpg, .png, .pdf, .svg]
      mime_examples: [video/mp4, audio/mpeg, image/png, application/pdf]
    6_stream_properties:
      video: [resolution, fps, aspect_ratio, color_space, bitrate]
      audio: [sample_rate, channels, bitrate]
    7_processing_operations_fine: [mux, demux, remux, transcode, filter]
    8_delivery_modes: [progressive, streaming]   # streaming: HLS/DASH/RTMP/RTSP
    9_protections: [none, drm, hdcp, geoblock, signed_url, watermark]
    10_protocols: [file, http, https, ftp, rtmp, rtsp, hls, dash, concat, cache, bluray, crypto, data, device]
    11_ffmpeg_libraries: [libavcodec, libavformat, libavfilter, libswscale, libswresample]
    12_extractors_detectors: [yt-dlp_extractors, format_sniffers]
    13_manifests_playlists: [.m3u8, .mpd, .pls, .asx]
    14_descriptors_identifiers: [ID, EXT, PROTO, TBR, FPS, VCODEC, ACODEC, ABR, ASR, language, quality_label]
    15_filters_effects:
      video: [scale, crop, fps, overlay, subtitles, drawtext]
      audio: [volume, resample, loudnorm, highpass, lowpass, afftdn]
    16_metadata_tags: [id3, vorbis_comments, mp4_atoms, chapters, cover_art, language_tags]
    17_devices_inputs: [avfoundation, dshow, alsa, pulse, x11grab, webcams, microphones]

operations_coarse:  # your keyword-based verbs
  - convert
  - compress
  - upscale
  - shrink
  - combine
  - split
  - download
  - record
  - extract
  - edit
  - analyze
  - package

engines:
  - wasm      # browser/local (e.g., libvips via WASM)
  - vertd     # video/audio via ffmpeg daemon (VERT)
  - external  # outside stacks: LibreOffice, Poppler, etc.
  - unsupported
  - unknown

delivery_modes: [progressive, streaming]
protocols: [file, http, https, ftp, rtmp, rtsp, hls, dash, concat, cache, data, device]
media_types: [video, audio, image_raster, image_vector, document, subtitle, archive, playlist]

libraries_catalog:
  # top-level tools
  tools: [yt-dlp, ffmpeg, ffprobe, VERT, vertd, streamlink, imagemagick, libvips, exiftool, sox]
  # image stacks
  image:
    core: [libvips, imagemagick]
    codecs: [libjpeg-turbo, libpng, libwebp, libavif, libjxl, libtiff, libheif]
    optimize: [pngquant, oxipng, mozjpeg, gifsicle, svgo]
    vector_raster: [librsvg, resvg, cairosvg, potrace, icoutils]
  # audio/video
  av:
    ffmpeg: [libx264, libx265, libaom-av1, libvpx-vp9, libopus, libmp3lame, aac]
    dsp: [sox, rubberband, libsndfile]
  # docs/ebooks/html
  docs:
    pdf: [poppler, ghostscript, qpdf, pdfcpu]
    office: [libreoffice, unoconv]
    convert_text: [pandoc]
    html_pdf: [wkhtmltopdf, headless_chromium, weasyprint]
    ebooks: [calibre]
  # archives/fonts
  misc:
    archives: [libarchive, p7zip, unrar]
    fonts: [fonttools]

routing_rules:  # compact decision hints for auto-matcher
  - if: "media.input == video or output == video or animated == true"
    then: { engine: vertd, libraries_add: [ffmpeg] }
  - if: "media.input == image_raster and formats.in not in [heic, avif, jxl] and not animated"
    then: { engine: wasm, libraries_add: [libvips] }
  - if: "formats.in any of [heic, avif, jxl] OR requires_advanced_filters == true"
    then: { engine: vertd, libraries_add: [ffmpeg] }
  - if: "media.input in [image_vector, document, archive, playlist]"
    then: { engine: external }
  - if: "delivery.input == streaming or protocol any of [hls, dash, rtmp, rtsp]"
    then: { engine: vertd, libraries_add: [ffmpeg] }
  - if: "protection in [drm]"
    then: { engine: unsupported }
  - defaults:
      engine: unknown

tool_template: &tool_template
  id: ""                # slug, e.g., "mp4-to-mp3"
  name: ""              # human title
  operation: ""         # from operations_coarse
  engine: unknown
  libraries: []         # ["yt-dlp", "ffmpeg", "libvips", ...]
  media:
    input: ""           # media_types enum
    output: ""          # media_types enum
  formats:
    in: []              # ["mp4"]
    out: []             # ["mp3"]
    container_in: ""    # optional
    container_out: ""   # optional
    codec_in:           # optional
      video: []
      audio: []
    codec_out:          # optional
      video: []
      audio: []
  delivery:
    input: progressive
    output: progressive
  protection: none      # taxonomy.9
  protocol: [file]      # taxonomy.10
  params: {}            # e.g., { crf: 23, abr: "192k" }
  sample_cmd: ""        # ffmpeg / external example
  notes: ""
  tags: []              # e.g., ["quick","lossless"]

tools:  # a few filled examples using the template
  - <<: *tool_template
    id: mp4-to-mp3
    name: MP4 to MP3
    operation: convert
    engine: vertd
    libraries: [yt-dlp, ffmpeg, libmp3lame]
    media: { input: video, output: audio }
    formats:
      in: [mp4]
      out: [mp3]
      container_in: mp4
      container_out: mp3
      codec_in: { video: [h264], audio: [aac] }
      codec_out: { audio: [mp3] }
    protocol: [file, https]
    sample_cmd: ffmpeg -i in.mp4 -vn -c:a libmp3lame -q:a 2 out.mp3
    notes: extract + transcode audio

  - <<: *tool_template
    id: webp-to-png
    name: WEBP to PNG
    operation: convert
    engine: wasm
    libraries: [libvips]
    media: { input: image_raster, output: image_raster }
    formats: { in: [webp], out: [png] }
    sample_cmd: ""   # handled in-browser via libvips

  - <<: *tool_template
    id: heic-to-jpg
    name: HEIC to JPG
    operation: convert
    engine: vertd
    libraries: [ffmpeg, libheif, libjpeg-turbo]
    media: { input: image_raster, output: image_raster }
    formats: { in: [heic], out: [jpg] }
    sample_cmd: ffmpeg -i in.heic out.jpg
    notes: wasm builds often lack HEIC decode

  - <<: *tool_template
    id: mp4-to-gif
    name: MP4 to GIF
    operation: convert
    engine: vertd
    libraries: [ffmpeg]
    media: { input: video, output: image_raster }
    formats: { in: [mp4], out: [gif] }
    sample_cmd: >
      ffmpeg -i in.mp4 -vf "fps=12,scale=640:-1:flags=lanczos,split[s0][s1];
      [s0]palettegen[p];[s1][p]paletteuse" out.gif

  - <<: *tool_template
    id: pdf-to-png
    name: PDF to PNG
    operation: convert
    engine: external
    libraries: [poppler]
    media: { input: document, output: image_raster }
    formats: { in: [pdf], out: [png] }
    sample_cmd: pdftoppm -png -r 200 in.pdf out

  - <<: *tool_template
    id: merge-pdfs
    name: Combine PDFs
    operation: combine
    engine: external
    libraries: [qpdf]
    media: { input: document, output: document }
    formats: { in: [pdf], out: [pdf] }
    sample_cmd: qpdf --empty --pages a.pdf b.pdf c.pdf -- out.pdf

  - <<: *tool_template
    id: m3u8-download
    name: Download HLS (.m3u8)
    operation: download
    engine: vertd
    libraries: [yt-dlp, ffmpeg]
    media: { input: playlist, output: video }
    formats: { in: [m3u8], out: [mp4] }
    delivery: { input: streaming, output: progressive }
    protocol: [https, hls]
    sample_cmd: yt-dlp -f "bv*+ba/b" -o "%(title)s.%(ext)s" "<m3u8_or_page_url>"

  - <<: *tool_template
    id: png-compress
    name: Compress PNG
    operation: compress
    engine: wasm
    libraries: [libvips, pngquant]
    media: { input: image_raster, output: image_raster }
    formats: { in: [png], out: [png] }
    params: { quality: "60-80" }
    notes: lossy quantization for size wins

  - <<: *tool_template
    id: svg-to-png
    name: SVG to PNG
    operation: convert
    engine: external
    libraries: [librsvg]
    media: { input: image_vector, output: image_raster }
    formats: { in: [svg], out: [png] }
    sample_cmd: rsvg-convert -w 1024 -h 1024 in.svg > out.png

how to use this

  • start by appending your 1k tools under tools: using the *tool_template.

  • keep the coarse operations you like (convert/compress/…).

  • let your matcher script read this YAML, apply routing_rules, and:

    • fill engine/libraries if missing,
    • emit tools.json (for code) and tools.csv (for spreadsheets),
    • warn on engine: unknown or protection: drm.
Here’s the same “relevant libraries” list as a **spreadsheet-style table** so you can scan/sort quickly.

---

| Domain                       | Library / Tool                 | Purpose / Capabilities                       | License / Notes               |
| ---------------------------- | ------------------------------ | -------------------------------------------- | ----------------------------- |
| **Containers / Packaging**   | Bento4                         | MP4/HLS/DASH muxing, CMAF, transmux          | Permissive                    |
|                              | GPAC / MP4Box                  | MP4 mux/fragment, DASH/HLS packager          | LGPL                          |
|                              | Shaka Packager                 | DASH/HLS packaging, clear-key, DRM workflows | BSD (DRM = separate licensed) |
|                              | MKVToolNix (mkvmerge, mkvinfo) | Matroska mux/split/inspect                   | GPL                           |
| **Video Codecs**             | x264 / x265                    | H.264 / H.265 encoders                       | GPL                           |
|                              | SVT-AV1                        | Fast AV1 encoder (Intel)                     | BSD                           |
|                              | libaom-av1                     | Reference AV1 encoder                        | BSD                           |
|                              | rav1e                          | Rust AV1 encoder                             | Apache                        |
|                              | libvpx-vp9                     | VP9 codec (Google)                           | BSD                           |
|                              | dav1d                          | High-performance AV1 decoder                 | BSD                           |
| **Audio Codecs**             | libopus                        | Opus codec                                   | BSD                           |
|                              | libvorbis                      | Vorbis codec                                 | BSD                           |
|                              | libflac                        | FLAC codec                                   | BSD                           |
|                              | libmp3lame                     | MP3 encoder                                  | LGPL                          |
|                              | fdk-aac                        | High quality AAC encoder                     | License encumbered            |
| **Hardware Accel**           | NVIDIA NVENC/NVDEC             | HW encode/decode                             | Proprietary driver            |
|                              | Intel oneVPL/QSV               | HW encode/decode                             | Permissive                    |
|                              | VAAPI                          | Linux HW accel                               | MIT                           |
|                              | Apple VideoToolbox             | macOS HW accel                               | Proprietary                   |
| **Filters / Quality**        | VapourSynth                    | Scriptable video filter graphs               | MIT                           |
|                              | zimg                           | Colorspace, scaling, HDR→SDR                 | zlib                          |
|                              | libplacebo                     | GPU shaders, tonemapping                     | MIT                           |
|                              | libvmaf                        | Netflix VMAF quality metrics                 | BSD                           |
|                              | RNNoise                        | Neural noise suppression (audio)             | BSD                           |
| **Thumbnails / Waveforms**   | ffmpegthumbnailer              | Fast video thumbnails                        | GPL                           |
|                              | audiowaveform                  | Waveform JSON/PNGs                           | GPL                           |
| **Subtitles / Captions**     | libass                         | ASS/SSA render (burn-in)                     | ISC                           |
|                              | ccextractor                    | Extract CC to SRT                            | GPL                           |
|                              | ffsubsync                      | Auto-sync subs to media                      | MIT                           |
| **Images (Raster)**          | libheif                        | HEIC/HEIF codec                              | LGPL                          |
|                              | libavif                        | AVIF codec                                   | BSD                           |
|                              | libjxl                         | JPEG XL codec                                | BSD                           |
|                              | mozjpeg                        | Optimized JPEG encoder                       | BSD                           |
|                              | pngquant / oxipng              | PNG compress/optimize                        | GPL/MIT                       |
|                              | gifsicle                       | GIF compress/optimize                        | GPL                           |
| **Vector / Icons**           | librsvg / resvg                | SVG→PNG rasterization                        | LGPL/Apache                   |
|                              | SVGO                           | SVG optimizer (Node.js)                      | MIT                           |
|                              | potrace                        | Raster→vector tracing                        | GPL                           |
|                              | icoutils                       | ICO/ICNS packaging                           | GPL                           |
| **Documents / Ebooks / OCR** | Poppler                        | PDF→images/text                              | GPL                           |
|                              | Ghostscript                    | PDF/PostScript processing                    | AGPL                          |
|                              | qpdf                           | PDF merge/split/linearize                    | Apache                        |
|                              | pdfcpu                         | PDF toolkit (Go)                             | Apache                        |
|                              | LibreOffice / unoconv          | Docx/pptx/xlsx↔pdf                           | MPL/LGPL                      |
|                              | Pandoc                         | Universal doc converter                      | GPL                           |
|                              | Calibre                        | Ebook conversions                            | GPL                           |
|                              | Tesseract OCR                  | OCR engine                                   | Apache                        |
|                              | OCRmyPDF                       | Add searchable text layer                    | MIT                           |
| **Download / Ingest**        | aria2                          | Multi-source downloader                      | GPL                           |
|                              | curl / wget                    | Fetch with cookies/headers                   | MIT/GPL                       |
|                              | N\_m3u8DL-RE                   | Robust HLS/DASH downloader                   | GPL                           |
|                              | MediaInfo                      | Media metadata probe                         | BSD                           |
|                              | rclone                         | Cloud storage sync                           | MIT                           |
| **Live / IO / Servers**      | MediaMTX (rtsp-simple-server)  | RTSP/RTMP/WebRTC server                      | Apache                        |
|                              | NGINX RTMP module              | RTMP ingest                                  | BSD                           |
|                              | SRT / RIST                     | Reliable UDP transport                       | MPL/BSD                       |
| **Speech / Transcription**   | whisper.cpp / faster-whisper   | Local transcription (Whisper)                | MIT                           |
|                              | Vosk                           | Offline ASR                                  | Apache                        |
| **Bindings / SDKs**          | sharp (Node)                   | libvips binding                              | LGPL                          |
|                              | fluent-ffmpeg (Node)           | FFmpeg wrapper                               | MIT                           |
|                              | moviepy / ffmpeg-python        | Python video/audio processing                | MIT                           |
|                              | pydub                          | Python audio wrapper                         | MIT                           |
|                              | PyMuPDF / pdfminer.six         | Python PDF                                   | GPL                           |
|                              | pillow                         | Python imaging                               | PIL fork                      |
|                              | gstreamer-rs, rav1e (Rust)     | Media in Rust                                | BSD/Apache                    |
|                              | pdfcpu (Go), go-mediainfo      | Go toolkits                                  | Apache                        |

---

Domain,Library / Tool,Purpose / Capabilities,License / Notes
Containers / Packaging,Bento4,"MP4/HLS/DASH muxing, CMAF, transmux",Permissive
Containers / Packaging,GPAC / MP4Box,"MP4 mux/fragment, DASH/HLS packager",LGPL
Containers / Packaging,Shaka Packager,"DASH/HLS packaging, clear-key, DRM workflows",BSD (DRM separate)
Containers / Packaging,MKVToolNix,Matroska mux/split/inspect,GPL
Video Codecs,x264 / x265,H.264 / H.265 encoders,GPL
Video Codecs,SVT-AV1,Fast AV1 encoder (Intel),BSD
Video Codecs,libaom-av1,Reference AV1 encoder,BSD
Video Codecs,rav1e,Rust AV1 encoder,Apache
Video Codecs,libvpx-vp9,VP9 codec (Google),BSD
Video Codecs,dav1d,High-performance AV1 decoder,BSD
Audio Codecs,libopus,Opus codec,BSD
Audio Codecs,libvorbis,Vorbis codec,BSD
Audio Codecs,libflac,FLAC codec,BSD
Audio Codecs,libmp3lame,MP3 encoder,LGPL
Audio Codecs,fdk-aac,High quality AAC encoder,Encumbered
Hardware Accel,NVIDIA NVENC/NVDEC,HW encode/decode,Proprietary
Hardware Accel,Intel oneVPL/QSV,HW encode/decode,Permissive
Hardware Accel,VAAPI,Linux HW accel,MIT
Hardware Accel,Apple VideoToolbox,macOS HW accel,Proprietary
Filters / Quality,VapourSynth,Scriptable video filter graphs,MIT
Filters / Quality,zimg,"Colorspace, scaling, HDR→SDR",zlib
Filters / Quality,libplacebo,"GPU shaders, tonemapping",MIT
Filters / Quality,libvmaf,Netflix VMAF quality metrics,BSD
Filters / Quality,RNNoise,Neural noise suppression (audio),BSD
Thumbnails / Waveforms,ffmpegthumbnailer,Fast video thumbnails,GPL
Thumbnails / Waveforms,audiowaveform,Waveform JSON/PNGs,GPL
Subtitles / Captions,libass,ASS/SSA render (burn-in),ISC
Subtitles / Captions,ccextractor,Extract CC to SRT,GPL
Subtitles / Captions,ffsubsync,Auto-sync subs to media,MIT
Images (Raster),libheif,HEIC/HEIF codec,LGPL
Images (Raster),libavif,AVIF codec,BSD
Images (Raster),libjxl,JPEG XL codec,BSD
Images (Raster),mozjpeg,Optimized JPEG encoder,BSD
Images (Raster),pngquant / oxipng,PNG compress/optimize,GPL/MIT
Images (Raster),gifsicle,GIF compress/optimize,GPL
Vector / Icons,librsvg / resvg,SVG→PNG rasterization,LGPL/Apache
Vector / Icons,SVGO,SVG optimizer (Node.js),MIT
Vector / Icons,potrace,Raster→vector tracing,GPL
Vector / Icons,icoutils,ICO/ICNS packaging,GPL
Documents / Ebooks / OCR,Poppler,PDF→images/text,GPL
Documents / Ebooks / OCR,Ghostscript,PDF/PostScript processing,AGPL
Documents / Ebooks / OCR,qpdf,PDF merge/split/linearize,Apache
Documents / Ebooks / OCR,pdfcpu,PDF toolkit (Go),Apache
Documents / Ebooks / OCR,LibreOffice / unoconv,Docx/pptx/xlsx↔pdf,MPL/LGPL
Documents / Ebooks / OCR,Pandoc,Universal doc converter,GPL
Documents / Ebooks / OCR,Calibre,Ebook conversions,GPL
Documents / Ebooks / OCR,Tesseract OCR,OCR engine,Apache
Documents / Ebooks / OCR,OCRmyPDF,Add searchable text layer,MIT
Download / Ingest,aria2,Multi-source downloader,GPL
Download / Ingest,curl / wget,Fetch with cookies/headers,MIT/GPL
Download / Ingest,N_m3u8DL-RE,Robust HLS/DASH downloader,GPL
Download / Ingest,MediaInfo,Media metadata probe,BSD
Download / Ingest,rclone,Cloud storage sync,MIT
Live / IO / Servers,MediaMTX,RTSP/RTMP/WebRTC server,Apache
Live / IO / Servers,NGINX RTMP module,RTMP ingest,BSD
Live / IO / Servers,SRT / RIST,Reliable UDP transport,MPL/BSD
Speech / Transcription,whisper.cpp / faster-whisper,Local transcription (Whisper),MIT
Speech / Transcription,Vosk,Offline ASR,Apache
Bindings / SDKs,sharp (Node),libvips binding,LGPL
Bindings / SDKs,fluent-ffmpeg (Node),FFmpeg wrapper,MIT
Bindings / SDKs,moviepy / ffmpeg-python,Python video/audio processing,MIT
Bindings / SDKs,pydub,Python audio wrapper,MIT
Bindings / SDKs,PyMuPDF / pdfminer.six,Python PDF,GPL
Bindings / SDKs,pillow,Python imaging,PIL fork
Bindings / SDKs,"gstreamer-rs, rav1e (Rust)",Media in Rust,BSD/Apache
Bindings / SDKs,"pdfcpu (Go), go-mediainfo",Go toolkits,Apache