From 8c8de510104966d55a32ad8b040830d0d48cf034 Mon Sep 17 00:00:00 2001 From: 0-th Date: Mon, 7 Apr 2025 02:36:59 +0100 Subject: [PATCH 1/6] feat: add py-ipld-car library --- README.md | 3 ++- python/pyproject.toml | 3 ++- python/tests/test_car.py | 45 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 python/tests/test_car.py diff --git a/README.md b/README.md index 28b437d..24bc0d2 100644 --- a/README.md +++ b/README.md @@ -68,9 +68,10 @@ Fixtures are tested against the [libipld](https://github.com/ipld/libipld) stack ### Python Fixtures are tested against the [ipld-dag-pb](https://github.com/storacha/py-ipld-dag-pb) -library: +and [ipld-car](https://github.com/storacha/py-ipld-car) library: * DAG-PB: [ipld-dag-pb](https://pypi.org/project/ipld-dag-pb/) +* CAR: [ipld-car](https://pypi.org/project/ipld-car) ## Running tests diff --git a/python/pyproject.toml b/python/pyproject.toml index 52db23d..9390bcd 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -13,5 +13,6 @@ classifiers = [ dependencies = [ "multiformats", "ipld-dag-pb", - "pytest" + "pytest", + "ipld-car", ] diff --git a/python/tests/test_car.py b/python/tests/test_car.py new file mode 100644 index 0000000..87b4477 --- /dev/null +++ b/python/tests/test_car.py @@ -0,0 +1,45 @@ + +from typing import TypeAlias +from multiformats.varint import decode +import pytest +from pathlib import Path +from multiformats import CID +import ipld_car + +FIXTURES_DIR = Path(__file__).parents[2] / "fixtures" +REPO_ROOT = FIXTURES_DIR.parent + + +def load_all_fixtures() -> list[ipld_car.Block]: + """Load all non-negative fixtures CID and data""" + fixture_blocks: list[ipld_car.Block] = [] + + for dir in sorted(FIXTURES_DIR.iterdir()): + if dir.is_dir(): # skip .gitattributes file + for file in dir.iterdir(): + fixture_cid = CID.decode(file.stem) + with file.open(mode="rb") as file_obj: + fixture_data = file_obj.read() + fixture_blocks.append((fixture_cid, fixture_data,)) + + return fixture_blocks + + +@pytest.fixture +def car_fixture_data() -> bytes: + """Load CAR fixture data from CAR file""" + with open(REPO_ROOT / "fixtures.car", mode="rb") as f: + return f.read() + + +@pytest.mark.parametrize("fixture_block", load_all_fixtures()) +def test_car_decode(fixture_block, car_fixture_data): + decoded_roots, decoded_blocks = ipld_car.decode(car_fixture_data) + assert decoded_roots == [] + assert len(decoded_blocks) == 273 + assert fixture_block in decoded_blocks + + +def test_car_encode(car_fixture_data): + encoded_car = ipld_car.encode(roots=[], blocks=load_all_fixtures()) + assert encoded_car == car_fixture_data From b1aedbfeebdb372cc5804791a13add767f94d764 Mon Sep 17 00:00:00 2001 From: 0-th Date: Mon, 7 Apr 2025 02:42:14 +0100 Subject: [PATCH 2/6] fix: add car library to requirements.txt --- python/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/requirements.txt b/python/requirements.txt index 83414b0..de8faea 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -8,6 +8,8 @@ bases==0.3.0 # via multiformats iniconfig==2.1.0 # via pytest +ipld-car==0.0.1 + # via py-ipld-dag-pb-test (pyproject.toml) ipld-dag-pb==0.0.1 # via py-ipld-dag-pb-test (pyproject.toml) multiformats==0.3.1.post4 From 1aee9debb0eac132b63d4e59e0511d66e1717e66 Mon Sep 17 00:00:00 2001 From: 0-th Date: Thu, 10 Apr 2025 01:53:37 +0100 Subject: [PATCH 3/6] fix: test of car encoding --- python/tests/test_car.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/python/tests/test_car.py b/python/tests/test_car.py index 87b4477..4e9ef47 100644 --- a/python/tests/test_car.py +++ b/python/tests/test_car.py @@ -1,6 +1,3 @@ - -from typing import TypeAlias -from multiformats.varint import decode import pytest from pathlib import Path from multiformats import CID @@ -27,7 +24,7 @@ def load_all_fixtures() -> list[ipld_car.Block]: @pytest.fixture def car_fixture_data() -> bytes: - """Load CAR fixture data from CAR file""" + """Load CAR fixture data from CAR file: `fixtures.car`""" with open(REPO_ROOT / "fixtures.car", mode="rb") as f: return f.read() @@ -41,5 +38,23 @@ def test_car_decode(fixture_block, car_fixture_data): def test_car_encode(car_fixture_data): - encoded_car = ipld_car.encode(roots=[], blocks=load_all_fixtures()) - assert encoded_car == car_fixture_data + # encoded blocks from the fixtures dir into CAR + fixture_blocks = load_all_fixtures() + encoded_car = ipld_car.encode(roots=[], blocks=fixture_blocks) + + decoded_car_fixture_blocks_root, decoded_car_fixture_blocks = ipld_car.decode(encoded_car) + decoded_car_fixture_file_root, decoded_car_fixture_file_data = ipld_car.decode(car_fixture_data) + + assert decoded_car_fixture_blocks_root == decoded_car_fixture_file_root + + # verify same blocks are present in both + assert len(decoded_car_fixture_blocks) == len(decoded_car_fixture_file_data) + assert set(block[0] for block in decoded_car_fixture_blocks) == set(block[0] for block in decoded_car_fixture_file_data) + + # verify content by CID + encoded_blocks_dict = {block[0]: block[1] for block in decoded_car_fixture_blocks} + fixture_data_dict = {block[0]: block[1] for block in decoded_car_fixture_file_data} + + for cid, data in fixture_data_dict.items(): + assert cid in encoded_blocks_dict + assert encoded_blocks_dict[cid] == data From ec40ffffb2f856b03bbe50ce4cc59b9fda5c94d0 Mon Sep 17 00:00:00 2001 From: 0-th Date: Thu, 10 Apr 2025 02:22:28 +0100 Subject: [PATCH 4/6] chore: add .gitignore to python dir to ignore python-specific artifiacts from vcs --- python/.gitignore | 162 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 python/.gitignore diff --git a/python/.gitignore b/python/.gitignore new file mode 100644 index 0000000..82f9275 --- /dev/null +++ b/python/.gitignore @@ -0,0 +1,162 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ From 8fdcde7a33cded1d4b82fe4f7612eca881f0ac76 Mon Sep 17 00:00:00 2001 From: 0-th Date: Thu, 10 Apr 2025 02:25:27 +0100 Subject: [PATCH 5/6] fix: correct package metadata informations to prevent build system errors --- python/pyproject.toml | 4 +++- python/tests/test_car.py | 22 +++++++++++++++------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 9390bcd..035f736 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "py-ipld-dag-pb-test" +name = "python" version = "0.0.1" requires-python = ">=3.10" classifiers = [ @@ -16,3 +16,5 @@ dependencies = [ "pytest", "ipld-car", ] +[tool.hatch.build.targets.wheel] +packages = ["python"] diff --git a/python/tests/test_car.py b/python/tests/test_car.py index 4e9ef47..6d94f32 100644 --- a/python/tests/test_car.py +++ b/python/tests/test_car.py @@ -40,20 +40,28 @@ def test_car_decode(fixture_block, car_fixture_data): def test_car_encode(car_fixture_data): # encoded blocks from the fixtures dir into CAR fixture_blocks = load_all_fixtures() - encoded_car = ipld_car.encode(roots=[], blocks=fixture_blocks) + encoded_fixture_blocks_car = ipld_car.encode(roots=[], blocks=fixture_blocks) - decoded_car_fixture_blocks_root, decoded_car_fixture_blocks = ipld_car.decode(encoded_car) - decoded_car_fixture_file_root, decoded_car_fixture_file_data = ipld_car.decode(car_fixture_data) + decoded_car_fixture_blocks_root, decoded_car_fixture_blocks = ipld_car.decode( + encoded_fixture_blocks_car + ) + decoded_car_fixture_file_root, decoded_car_fixture_file = ipld_car.decode( + car_fixture_data + ) assert decoded_car_fixture_blocks_root == decoded_car_fixture_file_root - # verify same blocks are present in both - assert len(decoded_car_fixture_blocks) == len(decoded_car_fixture_file_data) - assert set(block[0] for block in decoded_car_fixture_blocks) == set(block[0] for block in decoded_car_fixture_file_data) + # verify same blocks are present in encoded-then-decoded-fixture-blocks + # and decoded car fixture file + assert len(decoded_car_fixture_blocks) == len(decoded_car_fixture_file) + assert ( + set(block[0] for block in decoded_car_fixture_blocks) == + set(block[0] for block in decoded_car_fixture_file) + ) # verify content by CID encoded_blocks_dict = {block[0]: block[1] for block in decoded_car_fixture_blocks} - fixture_data_dict = {block[0]: block[1] for block in decoded_car_fixture_file_data} + fixture_data_dict = {block[0]: block[1] for block in decoded_car_fixture_file} for cid, data in fixture_data_dict.items(): assert cid in encoded_blocks_dict From dd4b06048212caed7bbcda9f86bcfd4b2238844c Mon Sep 17 00:00:00 2001 From: 0-th Date: Tue, 17 Mar 2026 09:26:59 +0100 Subject: [PATCH 6/6] fix: order blocks before CAR fixture creation to enable comparison during tests --- fixtures.car | Bin 273018 -> 273018 bytes js/make-car.js | 11 +++++++++-- python/tests/test_car.py | 39 +++++++++------------------------------ 3 files changed, 18 insertions(+), 32 deletions(-) diff --git a/fixtures.car b/fixtures.car index 08e36bca2eee9136d0e6886aa7811d58ce685acb..2edf1d7d62b0bda36cec6ff9427dfd1b6836c973 100644 GIT binary patch delta 1302 zcmWktZA_C_6wZC$3*xF$+Cu3kSmbTrw^9(R6xg893bgdYprTnoCz&p_3JQw4xv7g9 zHj$*BY)3a`b8}0SWG23m>!K4)M7KCYrY6qsIkxELjBay&{m@YAXt#;YEevIPh<%z|ibC2aA(rf!>_PQVf@gwf(gSR1Iv;1qfec27I67Nj`|+c%dC zzM}Se=MnfJI8*)Q$u&KbwhPPkuvl=zZ`pD5=TQ%ClBo=ylGni!^+1{w!~OyqhBYAx zCfBcq8(|MB3YA#zfG8R8o}DUS4|`xMuWzwZ5A^56*=3z*HNaQ1)dV;06b~ zs*SOKT7~rn_IOlyp~nF;U$@InU8yz~ZZPrTzpMQaD~v%ZD~5Hv%Q`*6&l*^1h_l_p z`GWZ%j&{G?SjTsSmlRDu{$b(u7^{5A8QG+@%zE4npa@`SFa<6zl9oJbn z0q6J#Q&x$?dbm6;0`1~L7}Uqvd$r;{J>-j$8ZFle=(TeaYdF=uDj#mnr3#)kauyvg zNPP}=8>tho=W?vUDYY1KQVDGi{^Xg&8NAfRU}40=ZXOlD0MAJ=9Eo_~TcKDDoZW0- z!^_B@It)l$xIjp_o`It_>Vlqz#jvl@gPMFgh*qv1-sURcX$M`z99P9W4LkWzY}2X> zjCa!TFDoDRBDtW)4{UsGw}TTX$>qC;Ii*wnI6UQ0!xr8GiI%GB+Xp7X!J0bt`%n4| zZ0E1!$^=wQ)P)9_&H#nmy?@N^avMkRS5HsroY0_);U$?n>JJv-*~vTy%e191i3Aay z^aN{=NQD7L8fBd4qC=<~o7_$-a3GgXgFhO2*`asIRulXv?qUzNh#PXjm6d?Ywaa08 zP7;O;N!U}EVDDFuJTsoOQ#Y&#mSTGzoe2dQEf5KpLsxZ#&0j>?H84j?Kogk@pN8D< zu`YrC8EGXOGziB{_;4~UV#x=DW~Rzz_^AFH)Yz$v*?H8hE~*qomS`4FnPF*Tr5aA( zLEvR@w)(j3J0lBM2+tO>b?uwV3!!!XCdOfxN0PhYcI;Y`&Wd?C}RIDag9 z{T8loC^{9MPHASR@+tq^>$8(6jCpuKrfxiFqGc>~k&sMS5qp*8H3=6r_`pJo)aFC? zjnGrF3Le(9f*MvZAyGv=>ULASGPdpZjGIJloiLH2YNI7ELfp8T*Ha(5vC6{Fy3Y;U zqpe~!zl$3+APYXw)+|-4ZEMG=OH1z0?A~4u>=*g^fRYVSpMd%jA`V)$r%PWFJ4; zl>y5Jh#lO1Vt_A?QJZWSnL)&fEp!jziGxLO+Cr;%MOP6N4k3tLD1z==v`StV_EKTk z#}X89H%zi1(8lukLw7MeNuqW9??5qx^sElj!lW1;*=Zfr=n#1#nEYnMAyo;i+t0w z_>zDFf+1GI{mNYM`LX|<9c&rzW7(iNOx}hmKIY=(${eoha>Aly>fy6lHZP1Xgo0z# z#rxyl1Rf?9SZJYHKDCK|kZOgTCi<>#Wf_0AVm)Z|EXX&o1^h3^S~y-qPjIbrHu$T^ z5;#9VcJr{q01-Pi^K)Gi%!p7ESCmOGTB6f<7J_CUGx5a*5_F*g_&}!w4O?gq{8L3F zct1*vaNUn6hWmz~4pDZL8KK>bC_Th%%?=61RgBl? zLmIL1DcLkyzTfviDRx^~cAR{&z@Qa}hv+K3@K=*@Dsi!mZB_}nj(nITMueGC$6MV7 zxE!IGP@RJ3+a1Mi+{IZ|<=_%#`7!iM1%}SVOmJ|3OoFBqyuKtC%Y^P8k_kIPn4BDt zV5y5ug}7>5%kx||mFKod;B7_ltzUxV76dPF`wv^0onN_NfF~6!14adi3BFp-EO6C@ z@49m_xfm6(?H0Ae%gtCPe*o+6AGVe4LEQRdjt?fcvbVrfg|#bd(Q3@tYk!Pd`3-+2 z7%eEX-%CxfwHARhAGs{l3Z4+syP|9wjCYY!;y@)+66g-2wn|FTbu>Dp{k)e>gx7|x zr6Uc0Zb~&-1X3FNgl}@p1rz!R_r~{$1)~(%1Xh&bXQxrOU<;t3EDYf(r=8z2rw|jFK~Or-w`dRRmS}X}%F`F0{Y8FjhTSfztIB7@!5`{y*YY96ta6 diff --git a/js/make-car.js b/js/make-car.js index f56d12f..2ad5d65 100644 --- a/js/make-car.js +++ b/js/make-car.js @@ -10,13 +10,20 @@ const outStream = createWriteStream(outFile) const { writer, out } = await CarWriter.create([]) const pipe = pipeline(out, outStream) +const blocks = [] for (const { name, url } of fixtureDirectories()) { const data = await loadFixture(url) for (const { cid, bytes } of Object.values(data)) { - await writer.put({ cid: CID.parse(cid), bytes }) + blocks.push({ cid, bytes }) } } +blocks.sort((a, b) => a.cid < b.cid ? -1 : a.cid > b.cid ? 1 : 0) + +for (const { cid, bytes } of blocks) { + await writer.put({ cid: CID.parse(cid), bytes }) +} + await writer.close() await pipe -console.log(`Wrote fixtures to ${outFile}`) \ No newline at end of file +console.log(`Wrote fixtures to ${outFile}`) diff --git a/python/tests/test_car.py b/python/tests/test_car.py index 6d94f32..003488b 100644 --- a/python/tests/test_car.py +++ b/python/tests/test_car.py @@ -14,10 +14,10 @@ def load_all_fixtures() -> list[ipld_car.Block]: for dir in sorted(FIXTURES_DIR.iterdir()): if dir.is_dir(): # skip .gitattributes file for file in dir.iterdir(): - fixture_cid = CID.decode(file.stem) + fixture_cid = file.stem with file.open(mode="rb") as file_obj: fixture_data = file_obj.read() - fixture_blocks.append((fixture_cid, fixture_data,)) + fixture_blocks.append((CID.decode(fixture_cid), fixture_data,)) return fixture_blocks @@ -29,40 +29,19 @@ def car_fixture_data() -> bytes: return f.read() -@pytest.mark.parametrize("fixture_block", load_all_fixtures()) -def test_car_decode(fixture_block, car_fixture_data): +def test_car_decode(car_fixture_data): decoded_roots, decoded_blocks = ipld_car.decode(car_fixture_data) assert decoded_roots == [] - assert len(decoded_blocks) == 273 - assert fixture_block in decoded_blocks + assert ( + sorted([(block[0].encode(), block[1],) for block in load_all_fixtures()]) == + sorted([(block[0].encode(), block[1],) for block in decoded_blocks]) + ) def test_car_encode(car_fixture_data): # encoded blocks from the fixtures dir into CAR - fixture_blocks = load_all_fixtures() + fixture_blocks = sorted(load_all_fixtures(), key=lambda block: block[0].encode()) encoded_fixture_blocks_car = ipld_car.encode(roots=[], blocks=fixture_blocks) - - decoded_car_fixture_blocks_root, decoded_car_fixture_blocks = ipld_car.decode( - encoded_fixture_blocks_car - ) - decoded_car_fixture_file_root, decoded_car_fixture_file = ipld_car.decode( - car_fixture_data - ) - - assert decoded_car_fixture_blocks_root == decoded_car_fixture_file_root - + assert encoded_fixture_blocks_car == car_fixture_data # verify same blocks are present in encoded-then-decoded-fixture-blocks # and decoded car fixture file - assert len(decoded_car_fixture_blocks) == len(decoded_car_fixture_file) - assert ( - set(block[0] for block in decoded_car_fixture_blocks) == - set(block[0] for block in decoded_car_fixture_file) - ) - - # verify content by CID - encoded_blocks_dict = {block[0]: block[1] for block in decoded_car_fixture_blocks} - fixture_data_dict = {block[0]: block[1] for block in decoded_car_fixture_file} - - for cid, data in fixture_data_dict.items(): - assert cid in encoded_blocks_dict - assert encoded_blocks_dict[cid] == data