diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 14e164ee..b7a75e5e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -11,14 +11,15 @@ on: env: PYTHON_STANDALONE_VERSIONS: | [ - "20241219" + "20260414" ] PYTHON_VERSIONS: | [ - "3.10.16", - "3.11.11", - "3.12.8", - "3.13.1", + "3.10.20", + "3.11.15", + "3.12.13", + "3.13.13", + "3.14.4" ] jobs: @@ -40,7 +41,7 @@ jobs: pythonStandaloneVersion: ${{ fromJSON(needs.build-matrix.outputs.PYTHON_STANDALONE_VERSIONS) }} pythonVersion: ${{ fromJSON(needs.build-matrix.outputs.PYTHON_VERSIONS) }} fail-fast: false - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 steps: - name: clone run: | @@ -61,14 +62,12 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: 1.19 + go-version: '1.24' - name: build-tag run: | git config --global user.email "no@mail.exists" - git config --global user.name "go-embed-python releaser" - BUILD_NUM=$(./hack/next-build-num.sh ${{ matrix.pythonStandaloneVersion }} ${{ matrix.pythonVersion }}) - ./hack/build-tag.sh ${{ matrix.pythonStandaloneVersion }} ${{ matrix.pythonVersion }} $BUILD_NUM - echo $BUILD_NUM > build-num + git config --global user.name "goempy releaser" + ./hack/build-tag.sh ${{ matrix.pythonStandaloneVersion }} ${{ matrix.pythonVersion }} - name: git gc run: | git gc @@ -80,7 +79,7 @@ jobs: name: workdir-${{ matrix.pythonStandaloneVersion }} ${{ matrix.pythonVersion }} path: | git-dir - build-num + tag-name tests: needs: @@ -89,9 +88,9 @@ jobs: strategy: matrix: os: - - ubuntu-22.04 - - macos-13 - - windows-2022 + - ubuntu-24.04 + - macos-15 + - windows-2025 pythonStandaloneVersion: ${{ fromJSON(needs.build-matrix.outputs.PYTHON_STANDALONE_VERSIONS) }} pythonVersion: ${{ fromJSON(needs.build-matrix.outputs.PYTHON_VERSIONS) }} fail-fast: false @@ -107,11 +106,11 @@ jobs: shell: bash run: | git reset --hard - git checkout v0.0.0-${{ matrix.pythonVersion }}-${{ matrix.pythonStandaloneVersion }}-$(cat build-num) + git checkout "$(cat tag-name)" - name: Set up Go uses: actions/setup-go@v5 with: - go-version: 1.19 + go-version: '1.24' - name: run tests shell: bash run: | @@ -126,7 +125,7 @@ jobs: pythonStandaloneVersion: ${{ fromJSON(needs.build-matrix.outputs.PYTHON_STANDALONE_VERSIONS) }} pythonVersion: ${{ fromJSON(needs.build-matrix.outputs.PYTHON_VERSIONS) }} fail-fast: false - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 if: ${{ github.event_name == 'push' && github.ref_name == 'main' }} permissions: contents: write @@ -145,4 +144,4 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: push tag run: | - git push origin v0.0.0-${{ matrix.pythonVersion }}-${{ matrix.pythonStandaloneVersion }}-$(cat build-num) + git push origin "$(cat tag-name)" diff --git a/PRIMARY_PYTHON b/PRIMARY_PYTHON new file mode 100644 index 00000000..6324d401 --- /dev/null +++ b/PRIMARY_PYTHON @@ -0,0 +1 @@ +3.14 diff --git a/README.md b/README.md index 9dfcec87..00ddf9f4 100644 --- a/README.md +++ b/README.md @@ -1,128 +1,476 @@ -# Embedded Python Interpreter for Go +# goempy -This library provides an embedded distribution of Python, which should work out-of-the box on a selected set of -architectures and operating systems. +`goempy` bakes a CPython 3.14 interpreter into your Go binary. You call +`python.NewEmbeddedPython("myapp")`, you get back an `*exec.Cmd` factory, +and you run Python code on hosts that have no Python installed. There is no +CGo, no libpython to hunt for, no PyInstaller trick. -This library does not require CGO and solely relies on executing Python inside another process. It does not rely -on CPython binding to work. There is also no need to have Python pre-installed on the target host. - -You really only have to depend on this library and invoke it as follows: +This is a fork of [`kluctl/go-embed-python`](https://github.com/kluctl/go-embed-python). +I started the fork in April 2026 after Python 3.14 shipped and upstream had +been quiet for a few months. The design, the build pipeline, the pip +integration, the runtime extractor: all of that is the original authors' +work. See [Credits](#credits) for the full attribution. ```go +package main + import ( - "github.com/kluctl/go-embed-python/python" "os" + + "github.com/tamnd/goempy/python" ) func main() { - ep, err := python.NewEmbeddedPython("example") + ep, err := python.NewEmbeddedPython("hello") if err != nil { panic(err) } + defer ep.Cleanup() - cmd, err := ep.PythonCmd("-c", "print('hello')") - if err != nil { - panic(err) - } + cmd, _ := ep.PythonCmd("-c", "import sys; print(sys.version)") cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr - err = cmd.Run() - if err != nil { - panic(err) - } + _ = cmd.Run() } ``` -## Supported architectures -The following operating systems and architectures are supported: -* darwin-amd64 -* darwin-arm64 -* linux-amd64 -* linux-arm64 -* windows-amd64 +Build that and run it. On the first invocation you get roughly 22 MB of +Python extracted into `$TMPDIR/go-embedded-python-hello-/`. The +binary then execs `bin/python3 -c …` as a subprocess. Subsequent +invocations reuse the same directory. The integrity check compares each +file's size on disk against the embedded manifest and skips writing when +the two agree. + +## Why bother + +There are only a handful of ways to run Python from Go, and most of them +are annoying. -## Releases -Releases in this library are handled a bit different from what one might be used to. This library does currently not -follow a versioning schema comparable to sematic versioning. This might however change in the future. +The CGo route with [`go-python/cpy3`](https://github.com/go-python/cpy3) +and friends works, but you own the problem of getting the correct +libpython onto every machine you ship to. Cross-compilation is painful. +Long-running processes tend to pick up edge cases around the GIL and +reference counting. -Right now, every tagged release is compromised of the Python interpreter version, the [python-standalone](https://github.com/astral-sh/python-build-standalone) -and a build number. For example, the release version `v0.0.0-3.11.6-20241219-2` belongs to Python version 3.11.6, -the [20241219](https://github.com/astral-sh/python-build-standalone/releases/tag/20241219) version of python-standalone -and build number 2. The release version currently always has v0.0.0 as its own version. +A sidecar Python install works, but then you are shipping an installer +for each platform, and you cannot cleanly vendor your Python dependencies +with `go get`. -The way versioning is handled might result in popular dependency management tools (e.g. dependabot) to not work as you -might require it. Please watch out to not accidentally upgrade your Python version! +[PyOxidizer](https://github.com/indygreg/PyOxidizer) and +[pyembed](https://github.com/indygreg/PyOxidizer/tree/main/pyembed) embed +CPython in-process, Rust-side, and the project is effectively parked. -## How it works -This library uses the standalone Python distributions found at https://github.com/astral-sh/python-build-standalone as -the base. +The approach in `kluctl/go-embed-python`, and now this fork, is to put +the entire CPython tree into the Go binary via `//go:embed`, extract it +on first run, and exec it as a subprocess. That trades about 25 to 30 MB +of compressed binary size for a deployment story that is just `go build`. +For CLI tools, Kubernetes operators, and GitOps controllers that want +templating or a pure-Python library or two, I have not found a cleaner +option. -The `./hack/build-tag.sh` script is used to invoke `python/generate` and `pip/generate`, which then downloads, extracts -and packages all supported Python distributions. The script then also creates a tag which then can be used as a dependency -in your project. +## Architecture -The tagged release internally embed all Python sources and binaries via `//go:embed`. The `EmbeddedPython` object -is then used as a helper utility to access the embedded distribution. +There are three times that matter: release time (when CI builds the +tagged artifact), build time (when a user compiles their app against a +tag), and runtime (when that app runs). -`EmbeddedPython` is created via `NewEmbeddedPython`, which will extract the embedded distribution into a temporary folder. -Extraction is optimized in a way that it is only executed when needed (by verifying integrity of previously extracted -distributions). +### Release time -## Upgrading python -The Python version and downloaded distributions are controlled via the `.github/workflows/release.yml` workflow. It -contains a matrix of supported distributions. To upgrade Python, edit this workflow and create a pull request. +`python/generate/main.go` runs once per `(python, pbs, platform)` tuple. +For each tuple it does the following. -## Embedding Python libraries into your applications -This library provides utilities/helpers to allow embedding of external libraries into your own application. +It downloads the matching +`cpython-+--pgo+lto-full.tar.zst` asset from +[python-build-standalone](https://github.com/astral-sh/python-build-standalone). +Windows uses `pgo-full` (no LTO on MSVC builds). -To do this, create a simple generator application inside your application/library, for example in `internal/my-python-libs/generate/main.go`: +It decompresses with `klauspost/compress/zstd`, streams through +`archive/tar`, and writes the result into a staging directory. + +It removes parts of the stdlib we do not want to carry. Right now the +removal list is `ensurepip`, `idlelib`, `lib2to3`, `pydoc_data`, +`site-packages`, `test`, `turtledemo`, and the stray `bin` directory +that some packages install into. + +It runs `internal.CleanupPythonDir` to apply a keep-glob pattern. On +Unix the keep list is `bin/**`, `lib/*.so*`, `lib/*.dylib`, and +`lib/python3.*/**`. On Windows it is `Lib/**`, `DLLs/**`, `*.dll`, and +`*.exe`. Everything else in the install tree gets deleted. + +It calls `embed_util.CopyForEmbed`, which walks the cleaned tree, +gzip-compresses every regular file at `BestCompression`, preserves +symlinks in a manifest, and writes the result to +`python/internal/data/-/`. The manifest lands in +`files.json` alongside the compressed payload and is content-hashed. + +Finally it emits a small `embed__.go` stub with a build +constraint and a `//go:embed all:-` directive so the Go +compiler picks up only the right platform's bytes. + +`pip/generate` does roughly the same thing for pip-installed wheels. +It drives the freshly-extracted interpreter through `pip install -r +requirements.txt --platform --only-binary=:all:`, then packs +each target directory using the same `embed_util.CopyForEmbed` helper. + +The release workflow finishes by committing +`python/internal/data/` and `pip/internal/data/` to a detached branch +and tagging it. The primary Python line (currently 3.14) gets a clean +`v` tag, so `go get @latest` resolves to it. Secondary lines get +a semver prerelease suffix like `v-py3.13.13`. The main branch +stays slim because the binary data never lives on it. + +### Build time + +In a user's application you write `import "github.com/tamnd/goempy/python"` +and pin a specific release tag. The Go compiler resolves that tag, +pulls in the tree with the committed `python/internal/data/`, and +evaluates the per-platform `embed.go` files. Each file looks like this: ```go -package main +//go:build linux && amd64 + +package data import ( - "github.com/kluctl/go-embed-python/pip" + "embed" + "io/fs" ) +//go:embed all:linux-amd64 +var _data embed.FS +var Data, _ = fs.Sub(_data, "linux-amd64") +``` + +The build constraints mean the linker only embeds the bytes for the +`GOOS`/`GOARCH` combo you are compiling for. A `GOOS=linux GOARCH=arm64` +build pulls in one `linux-arm64` tree and nothing else. + +### Runtime + +`python.NewEmbeddedPython(name)` is the only entry point most users +need. Under the hood: + +``` +NewEmbeddedPython(name) + └─ embed_util.NewEmbeddedFiles(data.Data, "python-"+name) + ├─ read files.json from embed.FS + ├─ compute SHA-256 of the manifest + ├─ extractedPath := $TMPDIR/go-embedded-- + ├─ flock(extractedPath + ".lock") // serialize peers + ├─ walk manifest: + │ • if target exists and Size matches → skip + │ • else gunzip from embed.FS → write to disk + │ • replay symlinks through the manifest + └─ return EmbeddedFiles{extractedPath} + └─ NewPython(WithPythonHome(extractedPath)) +``` + +The hash suffix in the directory name lets two differently-versioned +binaries on the same machine coexist without stepping on each other. +The flock lock means two copies of the same binary starting at the same +time will not race to extract into the same directory. + +### The pieces + +`internal/tar.go` is a streaming zstd→tar extractor used at release +time. It handles regular files, directories, and symlinks. Hardlinks +currently error out, which is fine for CPython but worth knowing if +you fork this for a different payload. + +`internal/cleanup_python.go` holds the glob-driven keep-list that +trims the stdlib. This is where you tweak things if you want to ship +or drop a particular module. + +`embed_util/file_list.go` defines `fileList` and `fileListEntry`. +Each entry records name, mode, size, compression flag, and symlink +target. The full list is what gets serialized into `files.json`. + +`embed_util/packer.go` does the release-time packing. It walks the +install tree and compresses each regular file individually using +`compress/gzip` at `BestCompression`. It also writes the +`embed__.go` stub. + +`embed_util/embedded_files.go` is the runtime extractor. It takes any +`fs.FS`, so the same code extracts both the interpreter and your own +pip-packed packages. Symlinks resolve through the manifest rather than +through the host filesystem, which matters because `embed.FS` cannot +represent symlinks natively. + +`python/embedded_python.go` ties `EmbeddedFiles` to a `Python` +interface. The `Python` interface is a thin wrapper around +`exec.Command` that sets `PYTHONHOME` and `PYTHONPATH` for you. +`AddPythonPath(dir)` splices an additional directory in, which is how +you wire pip-packed packages into the interpreter. + +`pip/pip_lib.go` and `pip/embed_pip_packages.go` are build-time helpers +that package pip itself (already embedded in `pip/internal/data/`) and +run it against a user's `requirements.txt`. Platform selection uses +pip's `--platform` flag with hardcoded tags that map to the supported +`(goos, goarch)` matrix. + +### Per-file gzip vs one big tarball + +The obvious alternative to per-file gzip is a single `.tar.zst` blob +that gets extracted on first run. The original authors picked per-file +gzip and I agree with the choice. + +First, `embed.FS` lookups are path-based. Per-file entries let the +extractor `stat` each target, compare sizes, and skip writes when the +disk is already in sync with the manifest. The second-run fast path +is a few hundred syscalls and no decompression. + +Second, partial extracts degrade gracefully. Kill the process halfway +through first-run extraction and the next run resumes one file at a +time instead of having to redecompress a 100 MB archive. + +Third, gzip is in the standard library. zstd would shave 15 to 30 +percent off the compressed payload and the packer already pulls in +`klauspost/compress` for the release-time tarball, so switching is +not much work. It is on the [Roadmap](#roadmap); it just hasn't +bubbled to the top. + +### python-build-standalone + +The actual Python distributions come from +[`astral-sh/python-build-standalone`](https://github.com/astral-sh/python-build-standalone) +(PBS). That project produces fully relocatable, statically-linked, +PGO+LTO CPython builds with pinned OpenSSL, sqlite, libexpat, ncurses, +and so on. Portable CPython is harder than it sounds and they are the +people who have actually solved it. + +PBS was originally Gregory Szorc's project under +`indygreg/python-build-standalone`, built to support +[PyOxidizer](https://github.com/indygreg/PyOxidizer). Astral, the team +behind `uv` and `ruff`, took over maintenance in early 2024. It now +underpins `uv python install` and is one of the load-bearing pieces of +the modern Python packaging stack. + +`goempy` pulls the `*-pgo+lto-full.tar.zst` build on Unix and the +`*-pgo-full.tar.zst` build on Windows (no LTO on MSVC). For local +development the generator accepts a `--only-platforms=darwin/arm64` +flag so you can iterate without downloading all five builds. + +## Supported platforms + +| GOOS | GOARCH | PBS triple | LTO | Notes | +|---------|--------|------------------------------|-----|------------------------------------| +| linux | amd64 | `x86_64-unknown-linux-gnu` | yes | glibc ≥ 2.17 (manylinux_2_17) | +| linux | arm64 | `aarch64-unknown-linux-gnu` | yes | PGO+LTO since PBS 20260414 | +| darwin | amd64 | `x86_64-apple-darwin` | yes | macOS 11+ | +| darwin | arm64 | `aarch64-apple-darwin` | yes | macOS 11+ on Apple Silicon | +| windows | amd64 | `x86_64-pc-windows-msvc` | no | non-shared libpython; `.exe` entry | + +Not yet wired up: `windows/arm64`, `linux/musl` (amd64 and arm64), +`x86_64_v{2,3,4}` micro-arch Linux builds, Android, Emscripten, and +the free-threaded (PEP 703) variants. PBS has all of those; the +matrix in `release.yml` just does not enumerate them yet. + +## Supported Python versions + +Each tag is one `(python, pbs)` pair. As of the first `goempy` release +in April 2026, PBS 20260414 ships: + +| Python | Status | Notes | +|---------|------------|------------------------------------------------------| +| 3.14.4 | primary | free-threaded builds available but not yet packaged | +| 3.13.13 | stable | | +| 3.12.13 | stable | | +| 3.11.15 | stable | | +| 3.10.20 | sunsetting | CPython upstream EOL in October 2026 | + +One `release.yml` matrix run produces all of them. + +## Release tag scheme + +Each release produces one tag per supported Python line. The primary +line (3.14 right now) gets a clean semver tag. The other lines get +valid semver prereleases so that `go get @latest` always lands on +the primary: + +| CPython | Tag | How to pin | +|---------|----------------------|---------------------------------------------------| +| 3.14.4 | `v0.1.0` | `go get github.com/tamnd/goempy@latest` | +| 3.13.13 | `v0.1.0-py3.13.13` | `go get github.com/tamnd/goempy@v0.1.0-py3.13.13` | +| 3.12.13 | `v0.1.0-py3.12.13` | `go get github.com/tamnd/goempy@v0.1.0-py3.12.13` | +| 3.11.15 | `v0.1.0-py3.11.15` | `go get github.com/tamnd/goempy@v0.1.0-py3.11.15` | +| 3.10.20 | `v0.1.0-py3.10.20` | `go get github.com/tamnd/goempy@v0.1.0-py3.10.20` | + +The library version itself lives in the `VERSION` file at the repo +root; the primary Python line lives in `PRIMARY_PYTHON`. Bumping +either bumps every tag in the next release. + +## Embedding pip packages + +The same packer that produces the interpreter also packs pip-installed +wheels into a per-platform `embed.FS`. In your repo: + +```go +// internal/mylib/generate/main.go +package main + +import "github.com/tamnd/goempy/pip" + func main() { - err := pip.CreateEmbeddedPipPackagesForKnownPlatforms("requirements.txt", "./data/") + err := pip.CreateEmbeddedPipPackagesForKnownPlatforms( + "requirements.txt", + "./data/", + ) if err != nil { panic(err) } } ``` -Then create add the `//go:generate go run ./generate` statement to a .go file above the generator source, e.g. in `internal/my-python-libs/dummy.go`: -``` -package internal +```go +// internal/mylib/dummy.go +package mylib //go:generate go run ./generate ``` -And the requirements.txt in `internal/my-python-libs/requirements.txt`: ``` -jinja2==3.1.2 +# internal/mylib/requirements.txt +jinja2==3.1.4 ``` -When running `go generate ./...` inside your application/library, you'll get the referenced Python libraries installed -to `internal/my-python-libs/data`. The embedded data is then available via `data.Data` and can be passed to -`embed_util.NewEmbeddedFiles()` for extraction. +Then `go generate ./internal/mylib/...`. The generator downloads wheels +for every `(goos, goarch)` in the matrix using +`pip install --platform … --only-binary=:all:` and writes each target +directory out using the same packing logic. At runtime: + +```go +libs, _ := embed_util.NewEmbeddedFiles(data.Data, "mylib-pip") +ep.AddPythonPath(libs.GetExtractedPath()) + +cmd, _ := ep.PythonCmd("-c", "import jinja2; print(jinja2.__version__)") +``` + +The [`example/`](./example) directory in this repo has a small working +version. [`kluctl/go-jinja2`](https://github.com/kluctl/go-jinja2) has +a much fuller one, and that was where the pattern was first +stress-tested in production. + +## Roadmap + +Rough order of what I want to tackle next: + +1. Fix the manifest integrity check. The runtime "unchanged" path + compares only `Size()`, which can return a false positive across a + Python upgrade if a stdlib file happens to keep the same byte + count. The manifest already carries a content hash; use it. +2. Normalize path separators in the manifest when the packer runs on + Windows. Currently `filepath.Separator` leaks into `files.json` + as `\`, which breaks the per-entry lookup at runtime. +3. Switch per-file compression from gzip to zstd. Pure-Go decoder + via `klauspost/compress/zstd`, already a transitive dependency. +4. Add `windows/arm64` and `linux/musl` (amd64 and arm64) to the + matrix. PBS ships them. +5. Opt-in free-threaded (PEP 703) builds as a matrix axis. +6. A lazy extract mode: ship a single compressed blob and stream-extract + on first `PythonCmd` call. Would be an option, not the default; the + per-file layout is worth keeping for the skip-on-unchanged fast path. +7. Android and iOS triples. PBS 20260414 has Android, and Python 3.14 + promoted it to tier-3. + +Contributions welcome. + +## Non-goals + +In-process Python via CGo is explicitly out of scope. If that is what +you want, go with `go-python/cpy3` or PyOxidizer and make peace with +shipping libpython. + +Python-calls-Go or Go-calls-Python bindings are also out of scope. +[`go-python/gopy`](https://github.com/go-python/gopy) is the project +for that, and it composes fine with this one. + +A general-purpose `embed.FS` compressor is not something I want +`embed_util` to become, even though it could look like one. The +package is tuned for the particular shape of a CPython install tree: +many small files, some symlinks, no hardlinks, predictable layout. + +## Credits + +### Upstream: kluctl/go-embed-python + +The original work, and the overwhelming majority of the code in this +repository, is +[`kluctl/go-embed-python`](https://github.com/kluctl/go-embed-python). +It was written by Alexander Block +([@codablock](https://github.com/codablock)) and the Kluctl +contributors. The project was extracted from +[`kluctl/kluctl`](https://github.com/kluctl/kluctl), a Kubernetes GitOps +tool where it was originally used to embed Jinja2 templating without a +system Python dependency. + +If this library helps you, please go star the upstream repository. +Everything clever here is theirs. + +This fork adds, relative to the last upstream release +(`v0.0.0-3.13.1-20241219-1` in the old scheme): + +- Python 3.14.4 and python-build-standalone 20260414. +- Fix for the Windows PBS triple rename from + `pc-windows-msvc-shared-pgo-full` to `pc-windows-msvc-pgo-full`. + Without it, the 3.14 download silently 404'd on Windows. +- Upgrade of `linux/arm64` to PGO+LTO. PBS used to ship only + `lto-full` for aarch64 Linux and now ships `pgo+lto-full`. +- Go toolchain 1.19 to 1.24. `sirupsen/logrus` replaced by the + standard library's `log/slog`. +- pip 24.3.1 to 25.2, with `setuptools>=75` and `wheel>=0.45` pinned + explicitly so that `get-pip.py` cannot drift to older versions. +- A `--only-platforms` flag on `python/generate` so you can build + one platform locally instead of all five. +- Docs rewrite. + +Individual file headers keep their original attribution. The project +stays Apache-2.0. Nothing is relicensed. + +### python-build-standalone + +The CPython builds we redistribute come from +[`astral-sh/python-build-standalone`](https://github.com/astral-sh/python-build-standalone). +Before Astral took over, the project was +[`indygreg/python-build-standalone`](https://github.com/indygreg/python-build-standalone) +and was maintained by Gregory Szorc. Portable CPython is a hard +problem and this project is one of the quiet load-bearing pieces of +the Python packaging ecosystem. + +## Related projects -The path returned by `EmbeddedFiles.GetExtractedPath()` can then be added to the `EmbeddedPython` by calling -`AddPythonPath` on it. +Things you might want instead, or alongside: -An example of all this can be found in https://github.com/kluctl/go-jinja2 +- [`kluctl/go-embed-python`](https://github.com/kluctl/go-embed-python). + Upstream. If you are on Python 3.13 or earlier and the last upstream + release meets your needs, use this. +- [`kluctl/go-jinja2`](https://github.com/kluctl/go-jinja2). Jinja2 + templating for Go, implemented by driving an embedded Python + subprocess. The best worked example of how to wire a pip-packed + `embed.FS` into an `EmbeddedPython`. +- [`kluctl/kluctl`](https://github.com/kluctl/kluctl). Kubernetes + GitOps tool. Where this code originally came from. +- [`astral-sh/python-build-standalone`](https://github.com/astral-sh/python-build-standalone). + The portable CPython builds we redistribute. Read their release + notes before upgrading. +- [`astral-sh/uv`](https://github.com/astral-sh/uv). Rust-based Python + package and project manager. Also consumes PBS under the hood. If + you are reaching for Python from Rust, `uv` does roughly what + `goempy` does from Go, minus the subprocess model. +- [`indygreg/PyOxidizer`](https://github.com/indygreg/PyOxidizer) and + its [`pyembed`](https://github.com/indygreg/PyOxidizer/tree/main/pyembed) + crate. The other approach: in-process CPython linked into a single + binary. Mostly unmaintained now. +- [`go-python/gopy`](https://github.com/go-python/gopy). Generator for + Go bindings to Python packages. Different problem; complementary. +- [`go-python/cpy3`](https://github.com/go-python/cpy3). Direct CGo + bindings to libpython. What you use if you truly want in-process + Python and can live with the deployment pain. +- [`pypa/cibuildwheel`](https://github.com/pypa/cibuildwheel). Useful + reading for the cross-platform CI matrix patterns we imitate. -# Why another go+python solution? -There are already multiple implementations of go-bindings for Python, which however all rely on CGO and/or dynamic -linking. I experimented a lot with these and was not able to make it stable enough so that I could use it without fear -of the process crashing after some time. I even got to the point where I implemented my own dynamic library loader that -was not depending on CGO, but ultimately gave up when I realized that it would not work on all platforms. +## License -The only solution that was left was to spawn a Python process and use some kind of inter-process communication. For this -to work reliably, without any dependencies on the host system, it was required to embed a fully working Python -distribution into my Go binaries. I managed to make this flexible enough to put into a library so that others might -benefit as well. +Apache-2.0, same as upstream. See [`LICENSE`](./LICENSE). -Initially, this approach/code was part of https://github.com/kluctl/kluctl to allow Jinja2 templates in Go. The Jinja2 -part can now be found in https://github.com/kluctl/go-jinja2. +Original copyright: Kluctl contributors. Fork maintenance: Duc-Tam +Nguyen <tamnd@liteio.dev>. diff --git a/VERSION b/VERSION new file mode 100644 index 00000000..b82608c0 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +v0.1.0 diff --git a/embed_util/packer.go b/embed_util/packer.go index d5ded142..c83cc892 100644 --- a/embed_util/packer.go +++ b/embed_util/packer.go @@ -8,11 +8,12 @@ import ( "encoding/hex" "encoding/json" "fmt" - log "github.com/sirupsen/logrus" - "golang.org/x/sync/errgroup" + "log/slog" "os" "path/filepath" "strings" + + "golang.org/x/sync/errgroup" ) func CopyForEmbed(out string, dir string) error { @@ -21,7 +22,7 @@ func CopyForEmbed(out string, dir string) error { return err } - log.Infof("copying to %s with %d files", out, len(fl.Files)) + slog.Info("copying for embed", "out", out, "files", len(fl.Files)) err = copyFiles(out, dir, fl) if err != nil { return err diff --git a/example/main.go b/example/main.go index de967346..25bea1e2 100644 --- a/example/main.go +++ b/example/main.go @@ -1,7 +1,7 @@ package main import ( - "github.com/kluctl/go-embed-python/python" + "github.com/tamnd/goempy/python" "os" ) diff --git a/go.mod b/go.mod index 14424475..41582044 100644 --- a/go.mod +++ b/go.mod @@ -1,12 +1,11 @@ -module github.com/kluctl/go-embed-python +module github.com/tamnd/goempy -go 1.19 +go 1.24 require ( github.com/gobwas/glob v0.2.3 github.com/gofrs/flock v0.12.1 github.com/klauspost/compress v1.17.11 - github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.10.0 golang.org/x/sync v0.10.0 ) diff --git a/go.sum b/go.sum index d955dbe0..7743df6b 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,4 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= @@ -16,20 +15,14 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= -github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= -github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/hack/build-tag.sh b/hack/build-tag.sh index 2c8cca09..ad88d422 100755 --- a/hack/build-tag.sh +++ b/hack/build-tag.sh @@ -7,7 +7,6 @@ cd $DIR/.. PYTHON_STANDALONE_VERSION=$1 PYTHON_VERSION=$2 -BUILD_NUM=$3 if [ "$PYTHON_STANDALONE_VERSION" = "" ]; then echo "missing python-standalone version" @@ -19,25 +18,22 @@ if [ "$PYTHON_VERSION" = "" ]; then exit 1 fi -if [ "$BUILD_NUM" = "" ]; then - echo "missing build num" - exit 1 -fi - if [ ! -z "$(git status --porcelain)" ]; then echo "working directory is dirty!" exit 1 fi +TAG=$("$DIR/tag-name.sh" "$PYTHON_VERSION") + go run ./python/generate --python-standalone-version=$PYTHON_STANDALONE_VERSION --python-version $PYTHON_VERSION go run ./pip/generate -TAG=v0.0.0-$PYTHON_VERSION-$PYTHON_STANDALONE_VERSION-$BUILD_NUM - -echo "checking out temporary branch" +echo "tagging as $TAG (python $PYTHON_VERSION, python-build-standalone $PYTHON_STANDALONE_VERSION)" git checkout --detach git add -f python/internal/data git add -f pip/internal/data -git commit -m "added python $PYTHON_VERSION from python-standalone $PYTHON_STANDALONE_VERSION" +git commit -m "python $PYTHON_VERSION + python-build-standalone $PYTHON_STANDALONE_VERSION ($TAG)" git tag -f $TAG git checkout - + +echo "$TAG" > tag-name diff --git a/hack/next-build-num.sh b/hack/next-build-num.sh deleted file mode 100755 index 10711077..00000000 --- a/hack/next-build-num.sh +++ /dev/null @@ -1,35 +0,0 @@ -set -e - -DIR=$(cd $(dirname $0) && pwd) -cd $DIR/.. - -PYTHON_STANDALONE_VERSION=$1 -PYTHON_VERSION=$2 - -if [ "$PYTHON_STANDALONE_VERSION" = "" ]; then - echo "missing python-standalone version" - exit 1 -fi - -if [ "$PYTHON_VERSION" = "" ]; then - echo "missing python version" - exit 1 -fi - -REMOTE_TAGS=$(git ls-remote) -LOCAL_TAGS=$(git tag) -#echo REMOTE_TAGS=$REMOTE_TAGS -#echo LOCAL_TAGS=$LOCAL_TAGS - -BUILD_NUM=1 - -while true; do - TAG=v0.0.0-$PYTHON_VERSION-$PYTHON_STANDALONE_VERSION-$BUILD_NUM - if [ "$(echo $REMOTE_TAGS | grep "refs/tags/$TAG")" != "" -o "$(echo $LOCAL_TAGS | grep "$TAG")" != "" ] ; then - BUILD_NUM=$(($BUILD_NUM+1)) - else - break - fi -done - -echo $BUILD_NUM diff --git a/hack/tag-name.sh b/hack/tag-name.sh new file mode 100755 index 00000000..87ec14cb --- /dev/null +++ b/hack/tag-name.sh @@ -0,0 +1,33 @@ +#!/bin/sh +# +# Compute the release tag for a given Python patch version. +# +# v for the primary Python line (VERSION + PRIMARY_PYTHON) +# v-py for every other Python line +# +# The primary tag has no prerelease suffix so `go get @latest` resolves to it. +# Non-primary tags are valid semver prereleases of the primary tag, so users +# pin them explicitly. + +set -e + +DIR=$(cd $(dirname $0) && pwd) +cd $DIR/.. + +PYTHON_VERSION=$1 +if [ -z "$PYTHON_VERSION" ]; then + echo "missing python version" >&2 + exit 1 +fi + +LIB_VERSION=$(tr -d ' \t\r\n' < VERSION) +PRIMARY_PYMM=$(tr -d ' \t\r\n' < PRIMARY_PYTHON 2>/dev/null || true) +: ${PRIMARY_PYMM:=3.14} + +PYMM=$(echo "$PYTHON_VERSION" | cut -d. -f1-2) + +if [ "$PYMM" = "$PRIMARY_PYMM" ]; then + printf "%s\n" "$LIB_VERSION" +else + printf "%s-py%s\n" "$LIB_VERSION" "$PYTHON_VERSION" +fi diff --git a/pip/embed_pip_packages.go b/pip/embed_pip_packages.go index b38814c4..c37b299e 100644 --- a/pip/embed_pip_packages.go +++ b/pip/embed_pip_packages.go @@ -2,9 +2,9 @@ package pip import ( "fmt" - "github.com/kluctl/go-embed-python/embed_util" - "github.com/kluctl/go-embed-python/internal" - "github.com/kluctl/go-embed-python/python" + "github.com/tamnd/goempy/embed_util" + "github.com/tamnd/goempy/internal" + "github.com/tamnd/goempy/python" "math/rand" "os" "path/filepath" diff --git a/pip/generate/main.go b/pip/generate/main.go index e003fe34..800cdeb7 100644 --- a/pip/generate/main.go +++ b/pip/generate/main.go @@ -2,8 +2,8 @@ package main import ( "fmt" - "github.com/kluctl/go-embed-python/pip" - "github.com/kluctl/go-embed-python/python" + "github.com/tamnd/goempy/pip" + "github.com/tamnd/goempy/python" "io" "net/http" "os" @@ -45,8 +45,14 @@ func bootstrapPip(ep *python.EmbeddedPython) { } } +// getPipURL bootstraps pip inside the freshly-extracted interpreter. The +// unversioned bootstrap.pypa.io/get-pip.py URL silently switches away from +// versions that still support older CPython releases, so we pin to the Python +// 3.14-era bootstrap that ships pip 25.x. +const getPipURL = "https://bootstrap.pypa.io/pip/get-pip.py" + func downloadGetPip() string { - resp, err := http.Get("https://bootstrap.pypa.io/get-pip.py") + resp, err := http.Get(getPipURL) if err != nil { panic(err) } diff --git a/pip/internal/requirements.txt b/pip/internal/requirements.txt index 662f25f2..eba63c0c 100644 --- a/pip/internal/requirements.txt +++ b/pip/internal/requirements.txt @@ -1 +1,3 @@ -pip==24.3.1 +pip==25.2 +setuptools>=75.0 +wheel>=0.45.0 diff --git a/pip/pip_lib.go b/pip/pip_lib.go index c89e4e68..a3f2f553 100644 --- a/pip/pip_lib.go +++ b/pip/pip_lib.go @@ -2,8 +2,8 @@ package pip import ( "fmt" - "github.com/kluctl/go-embed-python/embed_util" - "github.com/kluctl/go-embed-python/pip/internal/data" + "github.com/tamnd/goempy/embed_util" + "github.com/tamnd/goempy/pip/internal/data" ) func NewPipLib(name string) (*embed_util.EmbeddedFiles, error) { diff --git a/python/embedded_python.go b/python/embedded_python.go index 0f0e06e1..b2c7d564 100644 --- a/python/embedded_python.go +++ b/python/embedded_python.go @@ -2,8 +2,8 @@ package python import ( "fmt" - "github.com/kluctl/go-embed-python/embed_util" - "github.com/kluctl/go-embed-python/python/internal/data" + "github.com/tamnd/goempy/embed_util" + "github.com/tamnd/goempy/python/internal/data" ) type EmbeddedPython struct { diff --git a/python/embedded_python_test.go b/python/embedded_python_test.go index 74ed28e5..ceab1b49 100644 --- a/python/embedded_python_test.go +++ b/python/embedded_python_test.go @@ -3,7 +3,7 @@ package python import ( "bytes" "fmt" - "github.com/kluctl/go-embed-python/internal" + "github.com/tamnd/goempy/internal" "github.com/stretchr/testify/assert" "io" "math/rand" diff --git a/python/generate/main.go b/python/generate/main.go index f980fec8..b5fe9e09 100644 --- a/python/generate/main.go +++ b/python/generate/main.go @@ -3,17 +3,18 @@ package main import ( "flag" "fmt" - "github.com/gobwas/glob" - "github.com/klauspost/compress/zstd" - "github.com/kluctl/go-embed-python/embed_util" - "github.com/kluctl/go-embed-python/internal" - log "github.com/sirupsen/logrus" "io" + "log/slog" "net/http" "os" "path/filepath" "strings" "sync" + + "github.com/gobwas/glob" + "github.com/klauspost/compress/zstd" + "github.com/tamnd/goempy/embed_util" + "github.com/tamnd/goempy/internal" ) var ( @@ -22,6 +23,7 @@ var ( preparePath = flag.String("prepare-path", filepath.Join(os.TempDir(), "python-download"), "specify the path where the python executables are downloaded and prepared. automatically creates a temporary directory if unset") runPrepare = flag.Bool("prepare", true, "if set, python executables will be downloaded and prepared for packing at the configured path") runPack = flag.Bool("pack", true, "if set, previously prepared python executables will be packed into their redistributable form") + onlyPlatforms = flag.String("only-platforms", "", "comma-separated list of os/arch pairs to generate (e.g. 'linux/amd64,darwin/arm64'); default is all supported") pythonVersionBase string ) @@ -61,11 +63,13 @@ func main() { flag.Parse() if *pythonVersion == "" || *pythonStandaloneVersion == "" { - log.Fatal("missing flags") + slog.Error("missing flags") + os.Exit(1) } - log.Infof("python-standalone-version=%s", *pythonStandaloneVersion) - log.Infof("python-version=%s", *pythonVersion) + slog.Info("generating", + "python-standalone-version", *pythonStandaloneVersion, + "python-version", *pythonVersion) pythonVersionBase = strings.Join(strings.Split(*pythonVersion, ".")[0:2], ".") @@ -82,11 +86,30 @@ func main() { jobs := []job{ {"linux", "amd64", "unknown-linux-gnu-pgo+lto-full", keepNixPatterns}, - {"linux", "arm64", "unknown-linux-gnu-lto-full", keepNixPatterns}, + {"linux", "arm64", "unknown-linux-gnu-pgo+lto-full", keepNixPatterns}, {"darwin", "amd64", "apple-darwin-pgo+lto-full", keepNixPatterns}, {"darwin", "arm64", "apple-darwin-pgo+lto-full", keepNixPatterns}, - {"windows", "amd64", "pc-windows-msvc-shared-pgo-full", keepWinPatterns}, + {"windows", "amd64", "pc-windows-msvc-pgo-full", keepWinPatterns}, + } + + if *onlyPlatforms != "" { + want := map[string]bool{} + for _, p := range strings.Split(*onlyPlatforms, ",") { + want[strings.TrimSpace(p)] = true + } + filtered := jobs[:0] + for _, j := range jobs { + if want[j.os+"/"+j.arch] { + filtered = append(filtered, j) + } + } + jobs = filtered + if len(jobs) == 0 { + slog.Error("no platforms matched filter", "filter", *onlyPlatforms) + os.Exit(1) + } } + for _, j := range jobs { j := j wg.Add(1) @@ -109,7 +132,7 @@ func downloadAndPrepare(osName string, arch string, dist string, keepPatterns [] extractPath := downloadPath + ".extracted" err := os.RemoveAll(extractPath) if err != nil { - log.Panic(err) + panic(err) } extract(downloadPath, extractPath) @@ -160,7 +183,7 @@ func packPrepared(osName string, arch string, dist string, targetPath string) { func generateDownloadPath(arch string, dist string) string { pythonArch, ok := archMapping[arch] if !ok { - log.Errorf("arch %s not supported", arch) + slog.Error("arch not supported", "arch", arch) os.Exit(1) } fname := fmt.Sprintf("cpython-%s+%s-%s-%s.tar.zst", *pythonVersion, *pythonStandaloneVersion, pythonArch, dist) @@ -176,33 +199,37 @@ func download(osName string, arch string, dist string) string { downloadUrl := fmt.Sprintf("https://github.com/astral-sh/python-build-standalone/releases/download/%s/%s", *pythonStandaloneVersion, fname) if _, err := os.Stat(downloadPath); err == nil { - log.Infof("skipping download of %s", downloadUrl) + slog.Info("skipping download", "url", downloadUrl) return downloadPath } err := os.MkdirAll(filepath.Dir(downloadPath), 0o755) if err != nil { - log.Errorf("mkdirs failed: %v", err) + slog.Error("mkdirs failed", "err", err) os.Exit(1) } - log.Infof("downloading %s", downloadUrl) + slog.Info("downloading", "url", downloadUrl) r, err := http.Get(downloadUrl) if err != nil { - log.Errorf("download failed: %v", err) + slog.Error("download failed", "err", err) os.Exit(1) } if r.StatusCode == http.StatusNotFound { - log.Errorf("404 not found") + slog.Error("404 not found", "url", downloadUrl) os.Exit(1) } defer r.Body.Close() fileData, err := io.ReadAll(r.Body) + if err != nil { + slog.Error("reading response failed", "err", err) + os.Exit(1) + } err = os.WriteFile(downloadPath, fileData, 0o640) if err != nil { - log.Errorf("writing file failed: %v", err) + slog.Error("writing file failed", "err", err) os.Remove(downloadPath) os.Exit(1) } @@ -213,22 +240,22 @@ func download(osName string, arch string, dist string) string { func extract(archivePath string, targetPath string) string { f, err := os.Open(archivePath) if err != nil { - log.Errorf("opening file failed: %v", err) + slog.Error("opening file failed", "err", err) os.Exit(1) } defer f.Close() z, err := zstd.NewReader(f) if err != nil { - log.Errorf("decompression failed: %v", err) + slog.Error("decompression failed", "err", err) os.Exit(1) } defer z.Close() - log.Infof("decompressing %s", archivePath) + slog.Info("decompressing", "path", archivePath) err = internal.ExtractTarStream(z, targetPath) if err != nil { - log.Errorf("decompression failed: %v", err) + slog.Error("decompression failed", "err", err) os.Exit(1) } diff --git a/python/internal/data/dummy.go b/python/internal/data/dummy.go index 9566a9f1..12cd1fef 100644 --- a/python/internal/data/dummy.go +++ b/python/internal/data/dummy.go @@ -3,4 +3,4 @@ package data // PLEASE READ THIS!!!! // This file is really just a dummy. The release process will remove this file and generate some read embedded files // and commit these into a temporary branch and then tag it. This is to avoid clogging up the main branch with too many -// binary files, which would be a very bad experience when pulling in go-embed-python as a dependency. +// binary files, which would be a very bad experience when pulling in goempy as a dependency. diff --git a/python/python_test.go b/python/python_test.go index 35c39dce..6bf3c38a 100644 --- a/python/python_test.go +++ b/python/python_test.go @@ -2,7 +2,7 @@ package python import ( "bytes" - "github.com/kluctl/go-embed-python/internal" + "github.com/tamnd/goempy/internal" "github.com/stretchr/testify/assert" "io" "testing"