Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
42acb77
Added match models.
semohr Apr 14, 2026
67b1194
Added mappers to convert between models and beets objects.
semohr Apr 14, 2026
4fd2ec4
Added migration.
semohr Apr 14, 2026
89233d3
Number of fixes to schema and added data migration.
semohr Apr 14, 2026
30a751a
Added TrackInfoMapper tests.
semohr Apr 16, 2026
adaf73a
Added ER Diagrams to Docs, and a dependency to create them from our S…
pSpitzner Apr 18, 2026
1a67e6e
Moved database schema docs into backend specific docs.
semohr Apr 18, 2026
5826fff
Adjusted readthedocs file
semohr Apr 18, 2026
753bcf3
Added Classes overview to documentation
pSpitzner Apr 18, 2026
a756ba9
More consistent use of BeetsPrefix, added PR279 mermaid
pSpitzner Apr 19, 2026
4b0e945
Added notes on notation.
semohr Apr 19, 2026
5fc236a
Introduced item mapper and adjusted logic to make "new" items table
semohr Apr 19, 2026
030e00e
Fixed test.
semohr Apr 20, 2026
d2fe9d1
Rename search_album to search_name.
semohr Apr 18, 2026
4f5df29
Moved register for pickled files into temp folder.
semohr Apr 18, 2026
5447a04
Removed error handling as this is now catched by beets and should not
semohr Apr 18, 2026
23d4dff
Import changed for show_change.
semohr Apr 18, 2026
efc1d97
Fixed a few issues with our tag_albums caching for tests.
semohr Apr 18, 2026
ce4acb6
Beets update to 2.6.1
semohr Apr 18, 2026
08b3d36
Removed now unused imports
semohr Apr 18, 2026
0743658
Seems like a straight forward upgrade.
semohr Apr 19, 2026
e63699d
Incremented beets version to 2.8.0
semohr May 20, 2026
86378a8
Added new item reference in TrackMatch to our database mapper logic.
semohr May 20, 2026
ff8b776
Added alembic migration
semohr May 20, 2026
6d21086
Fixed typing errors as beets 2.8.0 is a bit stricter
semohr May 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- We now use `uv` (Universal Virtualenv) to manage python dependencies and run scripts in CI/CD. This should improve dependency resolution and installation times.
- We now ship a static ffmpeg binary instead of installing ffmpeg via apt. This should reduce image size and improve compatibility across different host systems.
- Added a database migration setup using [Alembic](https://alembic.sqlalchemy.org/) for future database migrations.
- Upgraded `beets` from `v2.5.1` to `v2.6.1`

## [1.2.0] - 25-12-17

Expand Down
372 changes: 372 additions & 0 deletions backend/alembic/versions/2026_04_12_2038-f06e470b3d1e_match.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,372 @@
"""match

Revision ID: f06e470b3d1e
Revises: 925cf8989fbc
Create Date: 2026-04-12 20:38:28.263069

README:
Historically, candidate states included a pickled match item. This approach has proven
to be brittle and difficult to maintain. This migration implements a more refined
database schema for matches.
"""

from __future__ import annotations
from collections.abc import Sequence
import importlib.util
import io
from pathlib import Path
import pickle
from typing import Any, NamedTuple

import sqlalchemy as sa
from sqlalchemy.orm import Session
from beets_flask.logger import logging
from beets_flask.database.models import types
from alembic import op

# We depend on other migrations (no other easy way to import)
BASE_DIR = Path(__file__).resolve().parent
path = BASE_DIR / "2026_04_12_1847-925cf8989fbc_item_pending.py"
spec = importlib.util.spec_from_file_location("item_pending_migration", path)
if not spec or not spec.loader:
raise ImportError
item_migration = importlib.util.module_from_spec(spec)
spec.loader.exec_module(item_migration)

# revision identifiers, used by Alembic.
revision: str = "f06e470b3d1e"
down_revision: str | Sequence[str] | None = "925cf8989fbc"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


log = logging.getLogger("alembic.runtime.migration")


def upgrade() -> None:
"""Upgrade schema."""
# core info table
op.create_table(
"album_info",
sa.Column("data", sa.JSON(), nullable=False),
sa.Column("id", sa.String(), primary_key=True),
sa.Column("created_at", sa.DateTime(), nullable=False),
sa.Column("updated_at", sa.DateTime(), nullable=False),
)
op.create_index("ix_album_info_created_at", "album_info", ["created_at"])

op.create_table(
"track_info",
sa.Column("album_id", sa.String(), sa.ForeignKey("album_info.id")),
sa.Column("data", sa.JSON(), nullable=False),
sa.Column("id", sa.String(), primary_key=True),
sa.Column("created_at", sa.DateTime(), nullable=False),
sa.Column("updated_at", sa.DateTime(), nullable=False),
)
op.create_index("ix_track_info_created_at", "track_info", ["created_at"])

# distance graph
op.create_table(
"distances",
sa.Column("track_info_id", sa.String(), sa.ForeignKey("track_info.id")),
sa.Column("parent_distance_id", sa.String(), sa.ForeignKey("distances.id")),
sa.Column("raw_distance", sa.Float(), nullable=False),
sa.Column("max_distance", sa.Float(), nullable=False),
sa.Column("id", sa.String(), primary_key=True),
sa.Column("created_at", sa.DateTime(), nullable=False),
sa.Column("updated_at", sa.DateTime(), nullable=False),
)
op.create_index("ix_distances_created_at", "distances", ["created_at"])

# matches
op.create_table(
"matches",
sa.Column("id", sa.String(), primary_key=True),
sa.Column("type", sa.String(), nullable=False),
sa.Column(
"distance_id", sa.String(), sa.ForeignKey("distances.id"), nullable=False
),
sa.Column("created_at", sa.DateTime(), nullable=False),
sa.Column("updated_at", sa.DateTime(), nullable=False),
)
op.create_table(
"matches_album",
sa.Column("id", sa.String(), sa.ForeignKey("matches.id"), primary_key=True),
sa.Column(
"info_id", sa.String(), sa.ForeignKey("album_info.id"), nullable=False
),
)
op.create_table(
"matches_track",
sa.Column("id", sa.String(), sa.ForeignKey("matches.id"), primary_key=True),
sa.Column(
"info_id", sa.String(), sa.ForeignKey("track_info.id"), nullable=False
),
)
op.create_index("ix_matches_created_at", "matches", ["created_at"])

# mappings
op.create_table(
"album_match_track_mappings",
sa.Column(
"album_match_id",
sa.String(),
sa.ForeignKey("matches_album.id"),
nullable=False,
),
sa.Column("track_info_id", sa.String(), sa.ForeignKey("track_info.id")),
sa.Column("item_id", sa.String(), sa.ForeignKey("items.id")),
sa.Column("id", sa.String(), primary_key=True, nullable=False),
sa.Column("created_at", sa.DateTime(), nullable=False),
sa.Column("updated_at", sa.DateTime(), nullable=False),
)
op.create_index(
"ix_album_match_track_mappings_created_at",
"album_match_track_mappings",
["created_at"],
)

# penalties
op.create_table(
"penalties",
sa.Column("key", sa.String(), nullable=False),
sa.Column("value", types.FloatListType(), nullable=False),
sa.Column(
"distance_id", sa.String(), sa.ForeignKey("distances.id"), nullable=False
),
sa.Column("id", sa.String(), primary_key=True),
sa.Column("created_at", sa.DateTime(), nullable=False),
sa.Column("updated_at", sa.DateTime(), nullable=False),
)
op.create_index("ix_penalties_created_at", "penalties", ["created_at"])
op.create_index("ix_penalties_key", "penalties", ["key"])

# Migrate candidate table
with op.batch_alter_table("candidate") as batch_op:
batch_op.add_column(sa.Column("match_id", sa.String(), nullable=True))

migrate_data()

with op.batch_alter_table("candidate") as batch_op:
batch_op.drop_column("match")
batch_op.alter_column("match_id", nullable=False)
batch_op.create_foreign_key(
"fk_candidate_match",
"matches",
["match_id"],
["id"],
)


def downgrade() -> None:
"""Downgrade schema."""

# candidate table (SQLite-safe)
with op.batch_alter_table("candidate") as batch_op:
batch_op.drop_constraint(
"fk_candidate_match",
type_="foreignkey",
)
batch_op.add_column(sa.Column("match", sa.BLOB(), nullable=True))
batch_op.drop_column("match_id")

# independent tables
op.drop_table("matches_track")
op.drop_table("matches_album")
op.drop_table("album_match_track_mappings")

op.drop_table("penalties")
op.drop_table("matches")
op.drop_table("distances")
op.drop_table("track_info")
op.drop_table("album_info")


def migrate_data():
from beets_flask.database.mapper.match import (
AlbumMatchMapper,
TrackMatchMapper,
Context,
)

conn = op.get_bind()
session = Session(bind=conn)

result = conn.execution_options(stream_results=True).execute(
sa.text("SELECT id, match FROM candidate WHERE match IS NOT NULL")
)
total = conn.execute(
sa.text("SELECT COUNT(*) FROM candidate WHERE match IS NOT NULL")
).scalar()
for i, row in enumerate(result, start=1):
if i % 100 == 0:
log.info("Migrating matches %d / %d rows", i, total)

candidate_id = row[0]
match_blob = row[1]

if not match_blob:
continue

try:
beets_match = load_match(match_blob)

# A bit of an anti patter here but easiest way out:
# We depend on our mappers here and hope they do not change in the future
db_match: Any
if isinstance(beets_match, AlbumMatchStub):
db_match = AlbumMatchMapper().from_beets(
beets_match, # type: ignore[arg-type]
Context(),
)

else:
db_match = TrackMatchMapper().from_beets(
beets_match, # type: ignore[arg-type]
Context(),
)

session.add(db_match)
session.flush() # gets db_match.id

conn.execute(
sa.text("UPDATE candidate SET match_id = :match_id WHERE id = :id"),
{"match_id": db_match.id, "id": candidate_id},
)

except Exception:
log.exception("Failed to migrate candidate %s", candidate_id)
raise

log.info("Migrated %d / %d matches!", total, total)


def load_match(blob: bytes) -> AlbumMatchStub | TrackMatchStub:
return MatchUnpickler(io.BytesIO(blob)).load()


# --------------------------- Mocked Beets Classes --------------------------- #


class AttributeDictStub:
def __init__(self, **kwargs):
self.__dict__.update(kwargs)

def __getstate__(self):
return self.__dict__.copy()

def __setstate__(self, state):
self.__dict__.update(state)

def __setitem__(self, key, value):
self.__dict__[key] = value

def __getitem__(self, key):
return self.__dict__[key]

def keys(self):
return self.__dict__.keys()

def values(self):
return self.__dict__.values()

def items(self):
return self.__dict__.items()


class DistanceStub:
def __init__(self):
self._penalties = {}
self.tracks = {}
self._raw_distance = 0.0 # Use private backing field
self._max_distance = 0.0

@property
def raw_distance(self) -> float:
return self._raw_distance

@raw_distance.setter
def raw_distance(self, value: float):
self._raw_distance = value

@property
def max_distance(self) -> float:
return self._max_distance

@max_distance.setter
def max_distance(self, value: float):
self._max_distance = value

def __getstate__(self):
return {
"_penalties": self._penalties,
"tracks": self.tracks,
"_raw_distance": self._raw_distance,
"_max_distance": self._max_distance,
}

def __setstate__(self, state):
self._penalties = state.get("_penalties", {})
self.tracks = state.get("tracks", {})
self._raw_distance = state.get("_raw_distance", 0.0)
self._max_distance = state.get("_max_distance", 0.0)


class AlbumMatchStub(NamedTuple):
distance: DistanceStub
info: AttributeDictStub
mapping: dict[Any, AttributeDictStub] # Any = item_migration.ModelStub
extra_items: list[Any]
extra_tracks: list[AttributeDictStub]


class TrackMatchStub(NamedTuple):
distance: DistanceStub
info: AttributeDictStub


class MatchUnpickler(pickle.Unpickler):
CLASS_MAP = {
("beets.dbcore.db", "LazyConvertDict"): item_migration.LazyConvertDictStub,
("beets.library", "Item"): item_migration.ModelStub,
("beets.library.models", "Item"): item_migration.ModelStub,
("beets.autotag.hooks", "AlbumMatch"): AlbumMatchStub,
("beets.autotag.hooks", "Distance"): DistanceStub,
("beets.autotag.hooks", "TrackInfo"): AttributeDictStub,
("beets.autotag.hooks", "AlbumInfo"): AttributeDictStub,
("beets.autotag.distance", "Distance"): DistanceStub,
("beetsplug.discogs", "IntermediateTrackInfo"): AttributeDictStub,
}

def find_class(self, module, name):
"""Override the find_class method to redirect Distance class references."""
key = (module, name)
if key not in self.CLASS_MAP:
print(f"WARNING: Unknown class not in migration map: {module}.{name}")
return dict # Fallback for unknown classes
return self.CLASS_MAP[key]

def load(self) -> Any:
object = super().load()
if isinstance(object, DistanceStub):
self._normalize(object)

if isinstance(object, AlbumMatchStub):
self._normalize(object.distance)

return object

def _normalize(self, obj):
if isinstance(obj, DistanceStub):
return self._normalize_distance(obj)
return obj

def _normalize_distance(self, distance: DistanceStub) -> DistanceStub:
# Beets had a rename at some point which we need to handle here.
if "source" in distance._penalties:
distance._penalties["data_source"] = distance._penalties.pop("source")

for _, child in distance.tracks.items():
self._normalize_distance(child)

return distance
Loading
Loading