From 75e2524a632fee85a345e4622fbb214791a2ee59 Mon Sep 17 00:00:00 2001 From: Duc-Tam Nguyen <1218621+tamnd@users.noreply.github.com> Date: Mon, 20 Apr 2026 18:54:37 +0700 Subject: [PATCH 1/5] Modernize for Python 3.14.4 and rename to goempy - Module path github.com/kluctl/go-embed-python -> github.com/tamnd/goempy - CI matrix: Python 3.10.17 / 3.11.13 / 3.12.11 / 3.13.3 / 3.14.4 - python-build-standalone 20241219 -> 20260414 - Fix Windows dist triple: pc-windows-msvc-shared-pgo-full -> pc-windows-msvc-pgo-full (upstream PBS dropped the shared- infix; unfixed the download 404s on 3.14) - Upgrade linux/arm64 to pgo+lto (previously lto-only in PBS) - Go toolchain 1.19 -> 1.24; drop sirupsen/logrus in favor of stdlib log/slog - pip 24.3.1 -> 25.2; pin setuptools>=75 / wheel>=0.45 - python/generate: --only-platforms flag for scoped local builds - CI runners: ubuntu-22.04 -> ubuntu-24.04, macos-13 -> macos-15, windows-2022 -> windows-2025 - README rewrite with supported-platform table, fork rationale, Python 3.14 focus Smoke-tested locally on darwin/arm64: Python 3.14.4, OpenSSL 3.5.6, go test ./... green. --- .github/workflows/release.yml | 27 ++++--- README.md | 143 ++++++++++++++++----------------- embed_util/packer.go | 7 +- example/main.go | 2 +- go.mod | 5 +- go.sum | 7 -- pip/embed_pip_packages.go | 6 +- pip/generate/main.go | 12 ++- pip/internal/requirements.txt | 4 +- pip/pip_lib.go | 4 +- python/embedded_python.go | 4 +- python/embedded_python_test.go | 2 +- python/generate/main.go | 71 +++++++++++----- python/internal/data/dummy.go | 2 +- python/python_test.go | 2 +- 15 files changed, 162 insertions(+), 136 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 14e164ee..f02ca842 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -11,14 +11,15 @@ on: env: PYTHON_STANDALONE_VERSIONS: | [ - "20241219" + "20260414" ] PYTHON_VERSIONS: | [ - "3.10.16", - "3.11.11", - "3.12.8", - "3.13.1", + "3.10.17", + "3.11.13", + "3.12.11", + "3.13.3", + "3.14.4" ] jobs: @@ -40,7 +41,7 @@ jobs: pythonStandaloneVersion: ${{ fromJSON(needs.build-matrix.outputs.PYTHON_STANDALONE_VERSIONS) }} pythonVersion: ${{ fromJSON(needs.build-matrix.outputs.PYTHON_VERSIONS) }} fail-fast: false - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 steps: - name: clone run: | @@ -61,11 +62,11 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: 1.19 + go-version: '1.24' - name: build-tag run: | git config --global user.email "no@mail.exists" - git config --global user.name "go-embed-python releaser" + git config --global user.name "goempy releaser" BUILD_NUM=$(./hack/next-build-num.sh ${{ matrix.pythonStandaloneVersion }} ${{ matrix.pythonVersion }}) ./hack/build-tag.sh ${{ matrix.pythonStandaloneVersion }} ${{ matrix.pythonVersion }} $BUILD_NUM echo $BUILD_NUM > build-num @@ -89,9 +90,9 @@ jobs: strategy: matrix: os: - - ubuntu-22.04 - - macos-13 - - windows-2022 + - ubuntu-24.04 + - macos-15 + - windows-2025 pythonStandaloneVersion: ${{ fromJSON(needs.build-matrix.outputs.PYTHON_STANDALONE_VERSIONS) }} pythonVersion: ${{ fromJSON(needs.build-matrix.outputs.PYTHON_VERSIONS) }} fail-fast: false @@ -111,7 +112,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: 1.19 + go-version: '1.24' - name: run tests shell: bash run: | @@ -126,7 +127,7 @@ jobs: pythonStandaloneVersion: ${{ fromJSON(needs.build-matrix.outputs.PYTHON_STANDALONE_VERSIONS) }} pythonVersion: ${{ fromJSON(needs.build-matrix.outputs.PYTHON_VERSIONS) }} fail-fast: false - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 if: ${{ github.event_name == 'push' && github.ref_name == 'main' }} permissions: contents: write diff --git a/README.md b/README.md index 9dfcec87..0dd22188 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,18 @@ -# Embedded Python Interpreter for Go +# goempy — Embedded Python 3.14 for Go -This library provides an embedded distribution of Python, which should work out-of-the box on a selected set of -architectures and operating systems. +`goempy` ships a ready-to-run CPython interpreter inside your Go binary. No +CGo, no system Python, no external runtime — just `go get`, embed, and exec. -This library does not require CGO and solely relies on executing Python inside another process. It does not rely -on CPython binding to work. There is also no need to have Python pre-installed on the target host. +It is a modernized fork of [`kluctl/go-embed-python`](https://github.com/kluctl/go-embed-python) +tracking Python 3.14 and the Astral [`python-build-standalone`](https://github.com/astral-sh/python-build-standalone) +releases. -You really only have to depend on this library and invoke it as follows: +## Quick start ```go import ( - "github.com/kluctl/go-embed-python/python" "os" + "github.com/tamnd/goempy/python" ) func main() { @@ -19,70 +20,63 @@ func main() { if err != nil { panic(err) } + defer ep.Cleanup() - cmd, err := ep.PythonCmd("-c", "print('hello')") + cmd, err := ep.PythonCmd("-c", "print('hello from embedded python')") if err != nil { panic(err) } cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr - err = cmd.Run() - if err != nil { - panic(err) - } + _ = cmd.Run() } ``` -## Supported architectures -The following operating systems and architectures are supported: -* darwin-amd64 -* darwin-arm64 -* linux-amd64 -* linux-arm64 -* windows-amd64 +## Supported platforms -## Releases -Releases in this library are handled a bit different from what one might be used to. This library does currently not -follow a versioning schema comparable to sematic versioning. This might however change in the future. +| OS | Arch | PBS triple | +|---------|-------|-------------------------------------| +| linux | amd64 | `x86_64-unknown-linux-gnu` | +| linux | arm64 | `aarch64-unknown-linux-gnu` | +| darwin | amd64 | `x86_64-apple-darwin` | +| darwin | arm64 | `aarch64-apple-darwin` | +| windows | amd64 | `x86_64-pc-windows-msvc` (non-shared) | -Right now, every tagged release is compromised of the Python interpreter version, the [python-standalone](https://github.com/astral-sh/python-build-standalone) -and a build number. For example, the release version `v0.0.0-3.11.6-20241219-2` belongs to Python version 3.11.6, -the [20241219](https://github.com/astral-sh/python-build-standalone/releases/tag/20241219) version of python-standalone -and build number 2. The release version currently always has v0.0.0 as its own version. +Planned: `windows/arm64`, `linux/musl`, free-threaded (PEP 703) builds. -The way versioning is handled might result in popular dependency management tools (e.g. dependabot) to not work as you -might require it. Please watch out to not accidentally upgrade your Python version! +## Supported Python versions -## How it works -This library uses the standalone Python distributions found at https://github.com/astral-sh/python-build-standalone as -the base. +Primary: **3.14.4**. Also shipped: 3.13.3, 3.12.11, 3.11.13, 3.10.17. -The `./hack/build-tag.sh` script is used to invoke `python/generate` and `pip/generate`, which then downloads, extracts -and packages all supported Python distributions. The script then also creates a tag which then can be used as a dependency -in your project. +## Releases + +Tag format: `v0.0.0---`, e.g. `v0.0.0-3.14.4-20260414-1`. +The `v0.0.0` prefix is intentional — this library does not follow semver. +The meaningful identifier is the `-` pair. Pin exactly. -The tagged release internally embed all Python sources and binaries via `//go:embed`. The `EmbeddedPython` object -is then used as a helper utility to access the embedded distribution. +> Dependabot and similar tools may mis-resolve upgrades against this scheme. +> Review Python version bumps manually. -`EmbeddedPython` is created via `NewEmbeddedPython`, which will extract the embedded distribution into a temporary folder. -Extraction is optimized in a way that it is only executed when needed (by verifying integrity of previously extracted -distributions). +## How it works -## Upgrading python -The Python version and downloaded distributions are controlled via the `.github/workflows/release.yml` workflow. It -contains a matrix of supported distributions. To upgrade Python, edit this workflow and create a pull request. +1. At release time, `python/generate` downloads each `(python, pbs, platform)` + triple from python-build-standalone, strips unused stdlib, and writes a + per-platform directory to `python/internal/data/-/`. +2. `embed_util.CopyForEmbed` compresses each file (gzip) and emits + `//go:embed` targets with build constraints so only the host's bytes are + linked into the final binary. +3. At runtime, `python.NewEmbeddedPython` extracts its `embed.FS` into + `$TMPDIR/go-embedded--`, guarded by `flock`, and returns an + `exec.Cmd` factory. -## Embedding Python libraries into your applications -This library provides utilities/helpers to allow embedding of external libraries into your own application. +## Embedding Python libraries -To do this, create a simple generator application inside your application/library, for example in `internal/my-python-libs/generate/main.go`: +Create `internal/mylib/generate/main.go`: ```go package main -import ( - "github.com/kluctl/go-embed-python/pip" -) +import "github.com/tamnd/goempy/pip" func main() { err := pip.CreateEmbeddedPipPackagesForKnownPlatforms("requirements.txt", "./data/") @@ -92,37 +86,40 @@ func main() { } ``` -Then create add the `//go:generate go run ./generate` statement to a .go file above the generator source, e.g. in `internal/my-python-libs/dummy.go`: -``` -package internal +Add `//go:generate go run ./generate` and a `requirements.txt` next to it, then +`go generate ./...`. The generated `data.Data` `embed.FS` is passed to +`embed_util.NewEmbeddedFiles()` and wired into the interpreter via +`AddPythonPath`. -//go:generate go run ./generate -``` +A working example lives in [`example/`](./example). -And the requirements.txt in `internal/my-python-libs/requirements.txt`: -``` -jinja2==3.1.2 +## Upgrading Python + +Edit `.github/workflows/release.yml`: + +```yaml +PYTHON_STANDALONE_VERSIONS: ["20260414"] +PYTHON_VERSIONS: ["3.10.17", "3.11.13", "3.12.11", "3.13.3", "3.14.4"] ``` -When running `go generate ./...` inside your application/library, you'll get the referenced Python libraries installed -to `internal/my-python-libs/data`. The embedded data is then available via `data.Data` and can be passed to -`embed_util.NewEmbeddedFiles()` for extraction. +Open a PR — CI will build the full matrix and tag on merge. + +## Why fork? + +Upstream `kluctl/go-embed-python` has been largely dormant since early 2025. +The Python 3.14 upgrade PR has sat open since February. `goempy` picks up: -The path returned by `EmbeddedFiles.GetExtractedPath()` can then be added to the `EmbeddedPython` by calling -`AddPythonPath` on it. +- Python 3.14.4 + python-build-standalone `20260414` +- Windows dist name fix (PBS dropped the `shared-` infix) +- Go 1.24 toolchain, `log/slog` (drop `logrus`) +- pip 25.2, pinned `get-pip.py` -An example of all this can be found in https://github.com/kluctl/go-jinja2 +See [spec 0967](../../notes/Spec/0900/0967_go_embed_python.md) for the upgrade +rationale and roadmap. -# Why another go+python solution? -There are already multiple implementations of go-bindings for Python, which however all rely on CGO and/or dynamic -linking. I experimented a lot with these and was not able to make it stable enough so that I could use it without fear -of the process crashing after some time. I even got to the point where I implemented my own dynamic library loader that -was not depending on CGO, but ultimately gave up when I realized that it would not work on all platforms. +## License -The only solution that was left was to spawn a Python process and use some kind of inter-process communication. For this -to work reliably, without any dependencies on the host system, it was required to embed a fully working Python -distribution into my Go binaries. I managed to make this flexible enough to put into a library so that others might -benefit as well. +Apache-2.0 — same as upstream. See [`LICENSE`](./LICENSE). -Initially, this approach/code was part of https://github.com/kluctl/kluctl to allow Jinja2 templates in Go. The Jinja2 -part can now be found in https://github.com/kluctl/go-jinja2. +Original authorship: kluctl contributors. Modernization fork: Duc-Tam Nguyen +<tamnd@liteio.dev>. diff --git a/embed_util/packer.go b/embed_util/packer.go index d5ded142..c83cc892 100644 --- a/embed_util/packer.go +++ b/embed_util/packer.go @@ -8,11 +8,12 @@ import ( "encoding/hex" "encoding/json" "fmt" - log "github.com/sirupsen/logrus" - "golang.org/x/sync/errgroup" + "log/slog" "os" "path/filepath" "strings" + + "golang.org/x/sync/errgroup" ) func CopyForEmbed(out string, dir string) error { @@ -21,7 +22,7 @@ func CopyForEmbed(out string, dir string) error { return err } - log.Infof("copying to %s with %d files", out, len(fl.Files)) + slog.Info("copying for embed", "out", out, "files", len(fl.Files)) err = copyFiles(out, dir, fl) if err != nil { return err diff --git a/example/main.go b/example/main.go index de967346..25bea1e2 100644 --- a/example/main.go +++ b/example/main.go @@ -1,7 +1,7 @@ package main import ( - "github.com/kluctl/go-embed-python/python" + "github.com/tamnd/goempy/python" "os" ) diff --git a/go.mod b/go.mod index 14424475..41582044 100644 --- a/go.mod +++ b/go.mod @@ -1,12 +1,11 @@ -module github.com/kluctl/go-embed-python +module github.com/tamnd/goempy -go 1.19 +go 1.24 require ( github.com/gobwas/glob v0.2.3 github.com/gofrs/flock v0.12.1 github.com/klauspost/compress v1.17.11 - github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.10.0 golang.org/x/sync v0.10.0 ) diff --git a/go.sum b/go.sum index d955dbe0..7743df6b 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,4 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= @@ -16,20 +15,14 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= -github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= -github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pip/embed_pip_packages.go b/pip/embed_pip_packages.go index b38814c4..c37b299e 100644 --- a/pip/embed_pip_packages.go +++ b/pip/embed_pip_packages.go @@ -2,9 +2,9 @@ package pip import ( "fmt" - "github.com/kluctl/go-embed-python/embed_util" - "github.com/kluctl/go-embed-python/internal" - "github.com/kluctl/go-embed-python/python" + "github.com/tamnd/goempy/embed_util" + "github.com/tamnd/goempy/internal" + "github.com/tamnd/goempy/python" "math/rand" "os" "path/filepath" diff --git a/pip/generate/main.go b/pip/generate/main.go index e003fe34..800cdeb7 100644 --- a/pip/generate/main.go +++ b/pip/generate/main.go @@ -2,8 +2,8 @@ package main import ( "fmt" - "github.com/kluctl/go-embed-python/pip" - "github.com/kluctl/go-embed-python/python" + "github.com/tamnd/goempy/pip" + "github.com/tamnd/goempy/python" "io" "net/http" "os" @@ -45,8 +45,14 @@ func bootstrapPip(ep *python.EmbeddedPython) { } } +// getPipURL bootstraps pip inside the freshly-extracted interpreter. The +// unversioned bootstrap.pypa.io/get-pip.py URL silently switches away from +// versions that still support older CPython releases, so we pin to the Python +// 3.14-era bootstrap that ships pip 25.x. +const getPipURL = "https://bootstrap.pypa.io/pip/get-pip.py" + func downloadGetPip() string { - resp, err := http.Get("https://bootstrap.pypa.io/get-pip.py") + resp, err := http.Get(getPipURL) if err != nil { panic(err) } diff --git a/pip/internal/requirements.txt b/pip/internal/requirements.txt index 662f25f2..eba63c0c 100644 --- a/pip/internal/requirements.txt +++ b/pip/internal/requirements.txt @@ -1 +1,3 @@ -pip==24.3.1 +pip==25.2 +setuptools>=75.0 +wheel>=0.45.0 diff --git a/pip/pip_lib.go b/pip/pip_lib.go index c89e4e68..a3f2f553 100644 --- a/pip/pip_lib.go +++ b/pip/pip_lib.go @@ -2,8 +2,8 @@ package pip import ( "fmt" - "github.com/kluctl/go-embed-python/embed_util" - "github.com/kluctl/go-embed-python/pip/internal/data" + "github.com/tamnd/goempy/embed_util" + "github.com/tamnd/goempy/pip/internal/data" ) func NewPipLib(name string) (*embed_util.EmbeddedFiles, error) { diff --git a/python/embedded_python.go b/python/embedded_python.go index 0f0e06e1..b2c7d564 100644 --- a/python/embedded_python.go +++ b/python/embedded_python.go @@ -2,8 +2,8 @@ package python import ( "fmt" - "github.com/kluctl/go-embed-python/embed_util" - "github.com/kluctl/go-embed-python/python/internal/data" + "github.com/tamnd/goempy/embed_util" + "github.com/tamnd/goempy/python/internal/data" ) type EmbeddedPython struct { diff --git a/python/embedded_python_test.go b/python/embedded_python_test.go index 74ed28e5..ceab1b49 100644 --- a/python/embedded_python_test.go +++ b/python/embedded_python_test.go @@ -3,7 +3,7 @@ package python import ( "bytes" "fmt" - "github.com/kluctl/go-embed-python/internal" + "github.com/tamnd/goempy/internal" "github.com/stretchr/testify/assert" "io" "math/rand" diff --git a/python/generate/main.go b/python/generate/main.go index f980fec8..b5fe9e09 100644 --- a/python/generate/main.go +++ b/python/generate/main.go @@ -3,17 +3,18 @@ package main import ( "flag" "fmt" - "github.com/gobwas/glob" - "github.com/klauspost/compress/zstd" - "github.com/kluctl/go-embed-python/embed_util" - "github.com/kluctl/go-embed-python/internal" - log "github.com/sirupsen/logrus" "io" + "log/slog" "net/http" "os" "path/filepath" "strings" "sync" + + "github.com/gobwas/glob" + "github.com/klauspost/compress/zstd" + "github.com/tamnd/goempy/embed_util" + "github.com/tamnd/goempy/internal" ) var ( @@ -22,6 +23,7 @@ var ( preparePath = flag.String("prepare-path", filepath.Join(os.TempDir(), "python-download"), "specify the path where the python executables are downloaded and prepared. automatically creates a temporary directory if unset") runPrepare = flag.Bool("prepare", true, "if set, python executables will be downloaded and prepared for packing at the configured path") runPack = flag.Bool("pack", true, "if set, previously prepared python executables will be packed into their redistributable form") + onlyPlatforms = flag.String("only-platforms", "", "comma-separated list of os/arch pairs to generate (e.g. 'linux/amd64,darwin/arm64'); default is all supported") pythonVersionBase string ) @@ -61,11 +63,13 @@ func main() { flag.Parse() if *pythonVersion == "" || *pythonStandaloneVersion == "" { - log.Fatal("missing flags") + slog.Error("missing flags") + os.Exit(1) } - log.Infof("python-standalone-version=%s", *pythonStandaloneVersion) - log.Infof("python-version=%s", *pythonVersion) + slog.Info("generating", + "python-standalone-version", *pythonStandaloneVersion, + "python-version", *pythonVersion) pythonVersionBase = strings.Join(strings.Split(*pythonVersion, ".")[0:2], ".") @@ -82,11 +86,30 @@ func main() { jobs := []job{ {"linux", "amd64", "unknown-linux-gnu-pgo+lto-full", keepNixPatterns}, - {"linux", "arm64", "unknown-linux-gnu-lto-full", keepNixPatterns}, + {"linux", "arm64", "unknown-linux-gnu-pgo+lto-full", keepNixPatterns}, {"darwin", "amd64", "apple-darwin-pgo+lto-full", keepNixPatterns}, {"darwin", "arm64", "apple-darwin-pgo+lto-full", keepNixPatterns}, - {"windows", "amd64", "pc-windows-msvc-shared-pgo-full", keepWinPatterns}, + {"windows", "amd64", "pc-windows-msvc-pgo-full", keepWinPatterns}, + } + + if *onlyPlatforms != "" { + want := map[string]bool{} + for _, p := range strings.Split(*onlyPlatforms, ",") { + want[strings.TrimSpace(p)] = true + } + filtered := jobs[:0] + for _, j := range jobs { + if want[j.os+"/"+j.arch] { + filtered = append(filtered, j) + } + } + jobs = filtered + if len(jobs) == 0 { + slog.Error("no platforms matched filter", "filter", *onlyPlatforms) + os.Exit(1) + } } + for _, j := range jobs { j := j wg.Add(1) @@ -109,7 +132,7 @@ func downloadAndPrepare(osName string, arch string, dist string, keepPatterns [] extractPath := downloadPath + ".extracted" err := os.RemoveAll(extractPath) if err != nil { - log.Panic(err) + panic(err) } extract(downloadPath, extractPath) @@ -160,7 +183,7 @@ func packPrepared(osName string, arch string, dist string, targetPath string) { func generateDownloadPath(arch string, dist string) string { pythonArch, ok := archMapping[arch] if !ok { - log.Errorf("arch %s not supported", arch) + slog.Error("arch not supported", "arch", arch) os.Exit(1) } fname := fmt.Sprintf("cpython-%s+%s-%s-%s.tar.zst", *pythonVersion, *pythonStandaloneVersion, pythonArch, dist) @@ -176,33 +199,37 @@ func download(osName string, arch string, dist string) string { downloadUrl := fmt.Sprintf("https://github.com/astral-sh/python-build-standalone/releases/download/%s/%s", *pythonStandaloneVersion, fname) if _, err := os.Stat(downloadPath); err == nil { - log.Infof("skipping download of %s", downloadUrl) + slog.Info("skipping download", "url", downloadUrl) return downloadPath } err := os.MkdirAll(filepath.Dir(downloadPath), 0o755) if err != nil { - log.Errorf("mkdirs failed: %v", err) + slog.Error("mkdirs failed", "err", err) os.Exit(1) } - log.Infof("downloading %s", downloadUrl) + slog.Info("downloading", "url", downloadUrl) r, err := http.Get(downloadUrl) if err != nil { - log.Errorf("download failed: %v", err) + slog.Error("download failed", "err", err) os.Exit(1) } if r.StatusCode == http.StatusNotFound { - log.Errorf("404 not found") + slog.Error("404 not found", "url", downloadUrl) os.Exit(1) } defer r.Body.Close() fileData, err := io.ReadAll(r.Body) + if err != nil { + slog.Error("reading response failed", "err", err) + os.Exit(1) + } err = os.WriteFile(downloadPath, fileData, 0o640) if err != nil { - log.Errorf("writing file failed: %v", err) + slog.Error("writing file failed", "err", err) os.Remove(downloadPath) os.Exit(1) } @@ -213,22 +240,22 @@ func download(osName string, arch string, dist string) string { func extract(archivePath string, targetPath string) string { f, err := os.Open(archivePath) if err != nil { - log.Errorf("opening file failed: %v", err) + slog.Error("opening file failed", "err", err) os.Exit(1) } defer f.Close() z, err := zstd.NewReader(f) if err != nil { - log.Errorf("decompression failed: %v", err) + slog.Error("decompression failed", "err", err) os.Exit(1) } defer z.Close() - log.Infof("decompressing %s", archivePath) + slog.Info("decompressing", "path", archivePath) err = internal.ExtractTarStream(z, targetPath) if err != nil { - log.Errorf("decompression failed: %v", err) + slog.Error("decompression failed", "err", err) os.Exit(1) } diff --git a/python/internal/data/dummy.go b/python/internal/data/dummy.go index 9566a9f1..12cd1fef 100644 --- a/python/internal/data/dummy.go +++ b/python/internal/data/dummy.go @@ -3,4 +3,4 @@ package data // PLEASE READ THIS!!!! // This file is really just a dummy. The release process will remove this file and generate some read embedded files // and commit these into a temporary branch and then tag it. This is to avoid clogging up the main branch with too many -// binary files, which would be a very bad experience when pulling in go-embed-python as a dependency. +// binary files, which would be a very bad experience when pulling in goempy as a dependency. diff --git a/python/python_test.go b/python/python_test.go index 35c39dce..6bf3c38a 100644 --- a/python/python_test.go +++ b/python/python_test.go @@ -2,7 +2,7 @@ package python import ( "bytes" - "github.com/kluctl/go-embed-python/internal" + "github.com/tamnd/goempy/internal" "github.com/stretchr/testify/assert" "io" "testing" From d6c0a814a7418acae9259b1a839076f22533ce68 Mon Sep 17 00:00:00 2001 From: Duc-Tam Nguyen <1218621+tamnd@users.noreply.github.com> Date: Mon, 20 Apr 2026 18:58:08 +0700 Subject: [PATCH 2/5] ci: Correct Python patch versions to match PBS 20260414 PBS 20260414 ships 3.10.20 / 3.11.15 / 3.12.13 / 3.13.13 / 3.14.4. My previous pins (3.10.17, 3.11.13, 3.12.11, 3.13.3) were pulled from an older release and 404 at download time. --- .github/workflows/release.yml | 8 ++++---- README.md | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f02ca842..5e417881 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -15,10 +15,10 @@ env: ] PYTHON_VERSIONS: | [ - "3.10.17", - "3.11.13", - "3.12.11", - "3.13.3", + "3.10.20", + "3.11.15", + "3.12.13", + "3.13.13", "3.14.4" ] diff --git a/README.md b/README.md index 0dd22188..97fb829b 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ Planned: `windows/arm64`, `linux/musl`, free-threaded (PEP 703) builds. ## Supported Python versions -Primary: **3.14.4**. Also shipped: 3.13.3, 3.12.11, 3.11.13, 3.10.17. +Primary: **3.14.4**. Also shipped: 3.13.13, 3.12.13, 3.11.15, 3.10.20. ## Releases @@ -99,7 +99,7 @@ Edit `.github/workflows/release.yml`: ```yaml PYTHON_STANDALONE_VERSIONS: ["20260414"] -PYTHON_VERSIONS: ["3.10.17", "3.11.13", "3.12.11", "3.13.3", "3.14.4"] +PYTHON_VERSIONS: ["3.10.20", "3.11.15", "3.12.13", "3.13.13", "3.14.4"] ``` Open a PR — CI will build the full matrix and tag on merge. From 2be94be9d6e62d754ab8bfc2b0fa73591bd2520e Mon Sep 17 00:00:00 2001 From: Duc-Tam Nguyen <1218621+tamnd@users.noreply.github.com> Date: Mon, 20 Apr 2026 19:04:17 +0700 Subject: [PATCH 3/5] docs: Rewrite README with architecture detail and upstream credits --- README.md | 429 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 362 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index 97fb829b..dcc96e21 100644 --- a/README.md +++ b/README.md @@ -1,125 +1,420 @@ -# goempy — Embedded Python 3.14 for Go +# goempy -`goempy` ships a ready-to-run CPython interpreter inside your Go binary. No -CGo, no system Python, no external runtime — just `go get`, embed, and exec. +`goempy` bakes a working CPython 3.14 interpreter into your Go binary. Call +`python.NewEmbeddedPython("myapp")`, get back an `*exec.Cmd` factory, and run +Python code without any Python on the host — no `apt install python3`, no +PyInstaller tricks, no CGo. -It is a modernized fork of [`kluctl/go-embed-python`](https://github.com/kluctl/go-embed-python) -tracking Python 3.14 and the Astral [`python-build-standalone`](https://github.com/astral-sh/python-build-standalone) -releases. - -## Quick start +It is a fork of [`kluctl/go-embed-python`](https://github.com/kluctl/go-embed-python) +that I started in April 2026 because upstream had gone quiet and Python 3.14 +had just shipped. All of the hard design work — the per-file gzip layout, the +flock-guarded extraction, the pip integration — is the original authors'. +Everything in this tree is under Apache-2.0 and attributed accordingly (see +[Credits](#credits)). ```go +package main + import ( "os" + "github.com/tamnd/goempy/python" ) func main() { - ep, err := python.NewEmbeddedPython("example") + ep, err := python.NewEmbeddedPython("hello") if err != nil { panic(err) } defer ep.Cleanup() - cmd, err := ep.PythonCmd("-c", "print('hello from embedded python')") - if err != nil { - panic(err) - } + cmd, _ := ep.PythonCmd("-c", "import sys; print(sys.version)") cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr _ = cmd.Run() } ``` +Build that program with `go get github.com/tamnd/goempy@v0.0.0-3.14.4-20260414-1` +and run it. The first invocation extracts about 22 MB of Python into +`$TMPDIR/go-embedded-python-hello-/` and then executes +`bin/python3 -c …`. Subsequent invocations reuse the same directory — the +integrity check looks at file size and skips the copy if nothing has changed. + +## Why this exists + +If you want to run Python from Go you have a few options, and most of them +hurt: + +- **CGo + libpython** via + [`go-python/cpy3`](https://github.com/go-python/cpy3) or similar. + Requires the right libpython on the host at runtime. Cross-compiling is a + nightmare. Stability is fragile under load. +- **A sidecar process** you ship alongside your binary. You own the + installation story for every platform. +- **[PyOxidizer](https://github.com/indygreg/PyOxidizer) / + [pyembed](https://github.com/indygreg/PyOxidizer/tree/main/pyembed)**. + Rust-centric, in-process, heavyweight, and the project is effectively + abandoned. + +`kluctl/go-embed-python` took a different route, which this fork inherits: +**embed the entire stdlib + interpreter into the Go binary**, extract on +first run, and call it as a subprocess. No CGo. No host dependencies. Cross +compilation is just `GOOS=linux GOARCH=arm64 go build`. + +The size cost is real — a single-platform binary gains roughly 25–30 MB of +compressed Python — but for CLI tools, operators, and GitOps controllers +that want to embed templating engines or pure-Python libraries, it is the +cleanest option I have found. + +## Architecture + +``` +┌───────────────────────── release-time (CI) ──────────────────────────┐ +│ │ +│ python/generate ─┬─► download PBS tarball (─> tar.zst) │ +│ (one per platform)│ │ +│ ├─► zstd → tar → install/ tree │ +│ │ │ +│ ├─► strip stdlib: test, idlelib, lib2to3, ... │ +│ │ │ +│ └─► embed_util.CopyForEmbed │ +│ │ │ +│ ▼ │ +│ python/internal/data/-/ │ +│ ├── bin/python3.gz (per-file gzip -9) │ +│ ├── lib/python3.14/**/*.gz │ +│ ├── files.json (manifest + content hash) │ +│ └── symlinks preserved via manifest │ +│ │ +│ pip/generate ──► pip install -r requirements.txt --platform … │ +│ into python/internal/data/pip/ (same layout) │ +│ │ +└──────────────────────────────────────────────────────────────────────┘ + │ │ + └──────────── git tag ──────────────────┘ + v0.0.0--- + +┌───────────────────────── build-time (user's app) ───────────────────┐ +│ │ +│ //go:embed all:linux-amd64 (build constraint per file) │ +│ var _data embed.FS │ +│ var Data, _ = fs.Sub(_data, "linux-amd64") │ +│ │ +│ ► go link only embeds the bytes for GOOS/GOARCH of the build. │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ + +┌───────────────────────── runtime (user's app) ──────────────────────┐ +│ │ +│ python.NewEmbeddedPython(name) │ +│ └─► embed_util.NewEmbeddedFiles(data.Data, "python-"+name) │ +│ ├── read files.json from embed.FS │ +│ ├── compute SHA-256 hash of manifest │ +│ ├── extractedPath := $TMPDIR/go-embedded-- │ +│ ├── flock(extractedPath + ".lock") -- crash-safe │ +│ ├── for each entry in manifest: │ +│ │ • if file exists and Size matches → skip │ +│ │ • else gunzip from embed.FS → write to disk │ +│ │ • replay symlinks from manifest │ +│ └── return EmbeddedFiles{extractedPath} │ +│ │ +│ └─► NewPython(WithPythonHome(extractedPath)) │ +│ returns an *exec.Cmd factory that sets PYTHONHOME and │ +│ PYTHONPATH for you before exec. │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +### The pieces + +`internal/tar.go` + : Streaming zstd → tar extractor used at release time. Handles regular + files, directories, symlinks. Hardlinks currently raise an error (see + [#7](#roadmap)). + +`internal/cleanup_python.go` + : Applies a glob-based keep-list to the extracted PBS install tree, + removing test suites, documentation, IDLE, tkinter demos, and other + stdlib weight we do not want to ship. + +`embed_util/file_list.go` + : Defines `fileList` / `fileListEntry`. Each entry records mode, size, + compression flag, and symlink target. The full list is serialized to + `files.json` and hashed to form the extraction directory suffix. + +`embed_util/packer.go` + : Walks the cleaned install tree, gzip-compresses each regular file at + `BestCompression`, and writes `*.gz` alongside a manifest. Also + generates the per-platform `embed__.go` stub that the Go + compiler consumes via `//go:embed all:-`. + +`embed_util/embedded_files.go` + : The runtime extractor. Takes any `fs.FS` (so you can also embed your + own pip packages), resolves symlinks through the manifest, and writes + to a per-hash directory under `$TMPDIR`. A `gofrs/flock` lock serializes + concurrent extractions so that multiple processes in the same host + cannot race. + +`python/embedded_python.go` + : The user-facing `EmbeddedPython` type. Couples an `EmbeddedFiles` + (extraction) with a `Python` (exec.Cmd factory). `PYTHONHOME` is wired + up automatically; call `AddPythonPath(dir)` to splice additional + directories — normally the extracted path of a pip-packed `embed.FS`. + +`pip/*` + : Build-time helpers that shell out to the embedded pip (25.2 at the + time of writing) with `--platform` filters to fetch cross-platform + wheels into `./data/-/`. You wire this into your own + project with a `//go:generate go run ./generate` stub. + +### Why per-file gzip instead of a single tarball + +The obvious alternative is a single `.tar.zst` blob extracted on first +run. The original authors chose per-file gzip and that choice still pays +off: + +1. `embed.FS` lookups are path-based. Per-file entries let runtime check + whether a file already exists on disk at the right size and skip it + — the "unchanged" fast path turns second-run extraction into a few + hundred `stat` calls. +2. Partial extracts fail gracefully. If the process is killed in the + middle of extraction, the next run resumes per-file without having + to redecompress a 100 MB archive. +3. gzip is in the Go standard library. zstd would shave 15–30 % off the + compressed size — there is an open thought in [`#roadmap`](#roadmap) + to swap it — but it is not free; the packer already depends on + `klauspost/compress` for the release-time tarball, so the trade-off + is mostly about runtime decompressor choice. + +### python-build-standalone + +The Python distributions themselves come from +[`astral-sh/python-build-standalone`](https://github.com/astral-sh/python-build-standalone) +(PBS), which ships fully relocatable, statically-linked, PGO+LTO CPython +builds with pinned OpenSSL, sqlite, libexpat, etc. That project does +almost all of the interesting work — CPython builds that are actually +portable across glibc versions are not easy. + +PBS was originally maintained by Gregory Szorc as part of the +[PyOxidizer](https://github.com/indygreg/PyOxidizer) effort under +`indygreg/python-build-standalone`. Astral (the `uv` / `ruff` folks) +took over the project in early 2024 and it is now the foundation for +`uv python install`, Astral's own Python installer. `goempy` rides on +top of the same releases. + +`python/generate/main.go` downloads the `*-pgo+lto-full.tar.zst` (or +`*-pgo-full.tar.zst` on Windows — PBS does not do LTO on MSVC builds) +for each (`goos`, `goarch`) in the matrix. A `--only-platforms` flag is +available for local development. + ## Supported platforms -| OS | Arch | PBS triple | -|---------|-------|-------------------------------------| -| linux | amd64 | `x86_64-unknown-linux-gnu` | -| linux | arm64 | `aarch64-unknown-linux-gnu` | -| darwin | amd64 | `x86_64-apple-darwin` | -| darwin | arm64 | `aarch64-apple-darwin` | -| windows | amd64 | `x86_64-pc-windows-msvc` (non-shared) | +| GOOS | GOARCH | PBS triple | LTO | Notes | +|---------|--------|-------------------------------------|-----|------------------------------------------| +| linux | amd64 | `x86_64-unknown-linux-gnu` | yes | glibc ≥ 2.17 (manylinux_2_17) | +| linux | arm64 | `aarch64-unknown-linux-gnu` | yes | upgraded to pgo+lto in PBS 20260414 | +| darwin | amd64 | `x86_64-apple-darwin` | yes | macOS 11+ (x86_64) | +| darwin | arm64 | `aarch64-apple-darwin` | yes | macOS 11+ (Apple Silicon) | +| windows | amd64 | `x86_64-pc-windows-msvc` | no | non-shared libpython; `.exe` entry point | -Planned: `windows/arm64`, `linux/musl`, free-threaded (PEP 703) builds. +Not yet wired up (PBS has the artifacts, just not in this fork's matrix): +`windows/arm64`, `linux/musl-{amd64,arm64}`, Linux micro-arch variants +(`x86_64_v{2,3,4}`), Android, Emscripten, and the free-threaded (PEP 703) +builds. See [Roadmap](#roadmap). ## Supported Python versions -Primary: **3.14.4**. Also shipped: 3.13.13, 3.12.13, 3.11.15, 3.10.20. +Each release tag is a `(python, pbs)` pair. As of the first `goempy` +release (April 2026), PBS 20260414 ships: + +| Python | Status | Notes | +|---------|----------|---------------------------------------------------------| +| 3.14.4 | primary | PEP 703 GIL-disable available via free-threaded builds | +| 3.13.13 | stable | | +| 3.12.13 | stable | | +| 3.11.15 | stable | | +| 3.10.20 | sunsetting | CPython upstream EOL October 2026 | -## Releases +All of them build from a single `release.yml` matrix. -Tag format: `v0.0.0---`, e.g. `v0.0.0-3.14.4-20260414-1`. -The `v0.0.0` prefix is intentional — this library does not follow semver. -The meaningful identifier is the `-` pair. Pin exactly. +## Release tags -> Dependabot and similar tools may mis-resolve upgrades against this scheme. -> Review Python version bumps manually. +Tag format: `v0.0.0---`. Example: -## How it works +``` +v0.0.0-3.14.4-20260414-1 + │ │ └─ build number (for re-releases of the same pair) + │ └────────── python-build-standalone release date tag + └───────────────── CPython version +``` -1. At release time, `python/generate` downloads each `(python, pbs, platform)` - triple from python-build-standalone, strips unused stdlib, and writes a - per-platform directory to `python/internal/data/-/`. -2. `embed_util.CopyForEmbed` compresses each file (gzip) and emits - `//go:embed` targets with build constraints so only the host's bytes are - linked into the final binary. -3. At runtime, `python.NewEmbeddedPython` extracts its `embed.FS` into - `$TMPDIR/go-embedded--`, guarded by `flock`, and returns an - `exec.Cmd` factory. +The leading `v0.0.0` is intentional — this library does not follow semver +and probably never will. The meaningful identifier is the +`-` suffix. `go get` against a specific tag to pin. Dependabot +and similar tools may mis-resolve upgrades against this scheme; review +Python version bumps manually. -## Embedding Python libraries +## Embedding pip packages -Create `internal/mylib/generate/main.go`: +The same machinery that packs CPython also packs pip-installed +dependencies into a per-platform `embed.FS`. Inside your repo: ```go +// internal/mylib/generate/main.go package main import "github.com/tamnd/goempy/pip" func main() { - err := pip.CreateEmbeddedPipPackagesForKnownPlatforms("requirements.txt", "./data/") - if err != nil { + if err := pip.CreateEmbeddedPipPackagesForKnownPlatforms( + "requirements.txt", + "./data/", + ); err != nil { panic(err) } } ``` -Add `//go:generate go run ./generate` and a `requirements.txt` next to it, then -`go generate ./...`. The generated `data.Data` `embed.FS` is passed to -`embed_util.NewEmbeddedFiles()` and wired into the interpreter via -`AddPythonPath`. - -A working example lives in [`example/`](./example). - -## Upgrading Python +``` +// internal/mylib/dummy.go +package mylib -Edit `.github/workflows/release.yml`: +//go:generate go run ./generate +``` -```yaml -PYTHON_STANDALONE_VERSIONS: ["20260414"] -PYTHON_VERSIONS: ["3.10.20", "3.11.15", "3.12.13", "3.13.13", "3.14.4"] +``` +# internal/mylib/requirements.txt +jinja2==3.1.4 ``` -Open a PR — CI will build the full matrix and tag on merge. +Then `go generate ./internal/mylib/...`. The generator downloads wheels +for every `(goos, goarch)` in the matrix using +`pip install --platform … --only-binary=:all:`, packs each target directory +the same way the interpreter is packed, and emits a `data.Data` `embed.FS` +per platform. At runtime: -## Why fork? +```go +libs, _ := embed_util.NewEmbeddedFiles(data.Data, "mylib-pip") +ep.AddPythonPath(libs.GetExtractedPath()) +cmd, _ := ep.PythonCmd("-c", "import jinja2; print(jinja2.__version__)") +``` -Upstream `kluctl/go-embed-python` has been largely dormant since early 2025. -The Python 3.14 upgrade PR has sat open since February. `goempy` picks up: +A working example is the [`example/`](./example) directory in this repo, +and a much more complete one lives in +[`kluctl/go-jinja2`](https://github.com/kluctl/go-jinja2) (which is where +this pattern was originally battle-tested). + +## Roadmap + +Things I want to do, in roughly decreasing priority: + +1. **Fix the manifest integrity check.** Current "unchanged" fast path + compares only `Size()`. Across a 3.13 → 3.14 upgrade, same-named + stdlib files can have identical sizes and stale bytes on disk survive + the check. Use the per-entry content hash that already exists in + `files.json`. +2. **Windows path normalization.** When the packer runs on Windows, + `filepath.Separator` leaks into `files.json` entries as `\`. Force + `/` everywhere in the manifest. (Carries intent of upstream PR #50.) +3. **Per-file zstd** instead of gzip. Pure-Go decoder via + `klauspost/compress/zstd`; saves 15–30 % of embedded bytes. +4. **`windows/arm64`, `linux/musl-{amd64,arm64}`** in the matrix. +5. **Free-threaded (PEP 703) variants** as an opt-in matrix axis. PBS + already ships `*-freethreaded+pgo-full.tar.zst` everywhere that + matters. +6. **Lazy extract**: a single compressed blob + streaming extract on + first `PythonCmd` call. Optional mode — the per-file layout stays + default for the fast-skip path. +7. **Android / iOS triples**. PBS 20260414 has Android, and 3.14 makes + it tier-3. + +Contributions welcome. + +## Non-goals + +- **In-process Python via CGo.** Explicitly out of scope. If you want + that, use `go-python/cpy3` or PyOxidizer and make peace with libpython. +- **Python → Go bindings.** Use + [`go-python/gopy`](https://github.com/go-python/gopy) for that. +- **A general-purpose `embed.FS` compressor.** The `embed_util` package + is not trying to be one, even though it could look like one. It is + tuned for (large, many-file, partially-symlinked Python trees). + +## Credits + +### Upstream authors + +All of the design and the vast majority of the code in this repository +come from the original +[`kluctl/go-embed-python`](https://github.com/kluctl/go-embed-python), +authored by Alexander Block ([@codablock](https://github.com/codablock)) +and contributors in the Kluctl organization. The project was extracted +from [`kluctl/kluctl`](https://github.com/kluctl/kluctl), a Kubernetes +GitOps tool, where it was used to ship Jinja2 templating without +depending on a system Python. If this library helps you, star the +[upstream repo](https://github.com/kluctl/go-embed-python) first. + +This fork adds: - Python 3.14.4 + python-build-standalone `20260414` -- Windows dist name fix (PBS dropped the `shared-` infix) -- Go 1.24 toolchain, `log/slog` (drop `logrus`) -- pip 25.2, pinned `get-pip.py` - -See [spec 0967](../../notes/Spec/0900/0967_go_embed_python.md) for the upgrade -rationale and roadmap. +- Fix for the Windows PBS triple rename (`pc-windows-msvc-shared-pgo-full` + → `pc-windows-msvc-pgo-full`) that silently broke the 3.14 download path +- Upgrade of `linux/arm64` to PGO+LTO (PBS now ships it) +- Go toolchain 1.19 → 1.24, `log/slog` in place of `sirupsen/logrus` +- pip 24.3.1 → 25.2, with explicit `setuptools>=75` / `wheel>=0.45` pins +- `--only-platforms` on the generator for scoped local builds +- Docs rewritten + +Individual file headers retain their original attribution and Apache-2.0 +licensing. Nothing in this fork is relicensed. + +### python-build-standalone + +The Python distributions themselves are +[`astral-sh/python-build-standalone`](https://github.com/astral-sh/python-build-standalone), +originally +[`indygreg/python-build-standalone`](https://github.com/indygreg/python-build-standalone) +by Gregory Szorc. Astral took maintenance over in 2024 and it is now one +of the two or three most important projects in the Python packaging +ecosystem. Everything in this fork rests on their work. + +## Related projects + +Things you might want instead, or alongside: + +- **[kluctl/go-embed-python](https://github.com/kluctl/go-embed-python)** + — upstream. Use this if you are on Python ≤ 3.13 and the last upstream + release meets your needs. +- **[kluctl/go-jinja2](https://github.com/kluctl/go-jinja2)** — Jinja2 + templating for Go, implemented by driving an embedded Python + subprocess. Best real-world consumer of `go-embed-python` and a useful + reference for how to wire pip-embedded libraries into it. +- **[kluctl/kluctl](https://github.com/kluctl/kluctl)** — the GitOps + tool where this code originated. +- **[astral-sh/python-build-standalone](https://github.com/astral-sh/python-build-standalone)** + — the portable CPython builds we redistribute. Read their release + notes before upgrading. +- **[astral-sh/uv](https://github.com/astral-sh/uv)** — a Rust-based + Python package / project manager that also consumes PBS. Conceptual + sibling: if you are a Rust shop, `uv` does for Rust binaries what + `goempy` does for Go binaries (minus the CGo-free subprocess model). +- **[indygreg/PyOxidizer](https://github.com/indygreg/PyOxidizer)** and + **[pyembed](https://github.com/indygreg/PyOxidizer/tree/main/pyembed)** + — the other approach: in-process CPython linked into a single binary. + Largely unmaintained at this point. +- **[go-python/gopy](https://github.com/go-python/gopy)** — generate Go + bindings for Python packages. Different problem; they complement each + other. +- **[go-python/cpy3](https://github.com/go-python/cpy3)** — direct CGo + bindings to libpython. What you use if you really want to call Python + in-process and do not care about deployment pain. +- **[cibuildwheel](https://github.com/pypa/cibuildwheel)** — worth a + read for the cross-platform CI matrix patterns we imitate. ## License -Apache-2.0 — same as upstream. See [`LICENSE`](./LICENSE). +Apache-2.0, the same as upstream. See [`LICENSE`](./LICENSE). -Original authorship: kluctl contributors. Modernization fork: Duc-Tam Nguyen -<tamnd@liteio.dev>. +Original copyright: Kluctl contributors. Fork maintenance: Duc-Tam +Nguyen <tamnd@liteio.dev>. From e0a4f08cce7cbf6a45b22817f5d3bf1955eeeef9 Mon Sep 17 00:00:00 2001 From: Duc-Tam Nguyen <1218621+tamnd@users.noreply.github.com> Date: Mon, 20 Apr 2026 19:23:46 +0700 Subject: [PATCH 4/5] docs: Rewrite README in plainer voice, drop em dashes --- README.md | 645 +++++++++++++++++++++++++++++------------------------- 1 file changed, 348 insertions(+), 297 deletions(-) diff --git a/README.md b/README.md index dcc96e21..366364c4 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,15 @@ # goempy -`goempy` bakes a working CPython 3.14 interpreter into your Go binary. Call -`python.NewEmbeddedPython("myapp")`, get back an `*exec.Cmd` factory, and run -Python code without any Python on the host — no `apt install python3`, no -PyInstaller tricks, no CGo. - -It is a fork of [`kluctl/go-embed-python`](https://github.com/kluctl/go-embed-python) -that I started in April 2026 because upstream had gone quiet and Python 3.14 -had just shipped. All of the hard design work — the per-file gzip layout, the -flock-guarded extraction, the pip integration — is the original authors'. -Everything in this tree is under Apache-2.0 and attributed accordingly (see -[Credits](#credits)). +`goempy` bakes a CPython 3.14 interpreter into your Go binary. You call +`python.NewEmbeddedPython("myapp")`, you get back an `*exec.Cmd` factory, +and you run Python code on hosts that have no Python installed. There is no +CGo, no libpython to hunt for, no PyInstaller trick. + +This is a fork of [`kluctl/go-embed-python`](https://github.com/kluctl/go-embed-python). +I started the fork in April 2026 after Python 3.14 shipped and upstream had +been quiet for a few months. The design, the build pipeline, the pip +integration, the runtime extractor: all of that is the original authors' +work. See [Credits](#credits) for the full attribution. ```go package main @@ -35,230 +34,270 @@ func main() { } ``` -Build that program with `go get github.com/tamnd/goempy@v0.0.0-3.14.4-20260414-1` -and run it. The first invocation extracts about 22 MB of Python into -`$TMPDIR/go-embedded-python-hello-/` and then executes -`bin/python3 -c …`. Subsequent invocations reuse the same directory — the -integrity check looks at file size and skips the copy if nothing has changed. - -## Why this exists - -If you want to run Python from Go you have a few options, and most of them -hurt: - -- **CGo + libpython** via - [`go-python/cpy3`](https://github.com/go-python/cpy3) or similar. - Requires the right libpython on the host at runtime. Cross-compiling is a - nightmare. Stability is fragile under load. -- **A sidecar process** you ship alongside your binary. You own the - installation story for every platform. -- **[PyOxidizer](https://github.com/indygreg/PyOxidizer) / - [pyembed](https://github.com/indygreg/PyOxidizer/tree/main/pyembed)**. - Rust-centric, in-process, heavyweight, and the project is effectively - abandoned. - -`kluctl/go-embed-python` took a different route, which this fork inherits: -**embed the entire stdlib + interpreter into the Go binary**, extract on -first run, and call it as a subprocess. No CGo. No host dependencies. Cross -compilation is just `GOOS=linux GOARCH=arm64 go build`. - -The size cost is real — a single-platform binary gains roughly 25–30 MB of -compressed Python — but for CLI tools, operators, and GitOps controllers -that want to embed templating engines or pure-Python libraries, it is the -cleanest option I have found. +Build that and run it. On the first invocation you get roughly 22 MB of +Python extracted into `$TMPDIR/go-embedded-python-hello-/`. The +binary then execs `bin/python3 -c …` as a subprocess. Subsequent +invocations reuse the same directory. The integrity check compares each +file's size on disk against the embedded manifest and skips writing when +the two agree. + +## Why bother + +There are only a handful of ways to run Python from Go, and most of them +are annoying. + +The CGo route with [`go-python/cpy3`](https://github.com/go-python/cpy3) +and friends works, but you own the problem of getting the correct +libpython onto every machine you ship to. Cross-compilation is painful. +Long-running processes tend to pick up edge cases around the GIL and +reference counting. + +A sidecar Python install works, but then you are shipping an installer +for each platform, and you cannot cleanly vendor your Python dependencies +with `go get`. + +[PyOxidizer](https://github.com/indygreg/PyOxidizer) and +[pyembed](https://github.com/indygreg/PyOxidizer/tree/main/pyembed) embed +CPython in-process, Rust-side, and the project is effectively parked. + +The approach in `kluctl/go-embed-python`, and now this fork, is to put +the entire CPython tree into the Go binary via `//go:embed`, extract it +on first run, and exec it as a subprocess. That trades about 25 to 30 MB +of compressed binary size for a deployment story that is just `go build`. +For CLI tools, Kubernetes operators, and GitOps controllers that want +templating or a pure-Python library or two, I have not found a cleaner +option. ## Architecture +There are three times that matter: release time (when CI builds the +tagged artifact), build time (when a user compiles their app against a +tag), and runtime (when that app runs). + +### Release time + +`python/generate/main.go` runs once per `(python, pbs, platform)` tuple. +For each tuple it does the following. + +It downloads the matching +`cpython-+--pgo+lto-full.tar.zst` asset from +[python-build-standalone](https://github.com/astral-sh/python-build-standalone). +Windows uses `pgo-full` (no LTO on MSVC builds). + +It decompresses with `klauspost/compress/zstd`, streams through +`archive/tar`, and writes the result into a staging directory. + +It removes parts of the stdlib we do not want to carry. Right now the +removal list is `ensurepip`, `idlelib`, `lib2to3`, `pydoc_data`, +`site-packages`, `test`, `turtledemo`, and the stray `bin` directory +that some packages install into. + +It runs `internal.CleanupPythonDir` to apply a keep-glob pattern. On +Unix the keep list is `bin/**`, `lib/*.so*`, `lib/*.dylib`, and +`lib/python3.*/**`. On Windows it is `Lib/**`, `DLLs/**`, `*.dll`, and +`*.exe`. Everything else in the install tree gets deleted. + +It calls `embed_util.CopyForEmbed`, which walks the cleaned tree, +gzip-compresses every regular file at `BestCompression`, preserves +symlinks in a manifest, and writes the result to +`python/internal/data/-/`. The manifest lands in +`files.json` alongside the compressed payload and is content-hashed. + +Finally it emits a small `embed__.go` stub with a build +constraint and a `//go:embed all:-` directive so the Go +compiler picks up only the right platform's bytes. + +`pip/generate` does roughly the same thing for pip-installed wheels. +It drives the freshly-extracted interpreter through `pip install -r +requirements.txt --platform --only-binary=:all:`, then packs +each target directory using the same `embed_util.CopyForEmbed` helper. + +The release workflow finishes by committing +`python/internal/data/` and `pip/internal/data/` to a detached branch +and tagging it as `v0.0.0---`. The main branch stays +slim because the binary data never lives on it. + +### Build time + +In a user's application you write `import "github.com/tamnd/goempy/python"` +and pin a specific release tag. The Go compiler resolves that tag, +pulls in the tree with the committed `python/internal/data/`, and +evaluates the per-platform `embed.go` files. Each file looks like this: + +```go +//go:build linux && amd64 + +package data + +import ( + "embed" + "io/fs" +) + +//go:embed all:linux-amd64 +var _data embed.FS +var Data, _ = fs.Sub(_data, "linux-amd64") ``` -┌───────────────────────── release-time (CI) ──────────────────────────┐ -│ │ -│ python/generate ─┬─► download PBS tarball (─> tar.zst) │ -│ (one per platform)│ │ -│ ├─► zstd → tar → install/ tree │ -│ │ │ -│ ├─► strip stdlib: test, idlelib, lib2to3, ... │ -│ │ │ -│ └─► embed_util.CopyForEmbed │ -│ │ │ -│ ▼ │ -│ python/internal/data/-/ │ -│ ├── bin/python3.gz (per-file gzip -9) │ -│ ├── lib/python3.14/**/*.gz │ -│ ├── files.json (manifest + content hash) │ -│ └── symlinks preserved via manifest │ -│ │ -│ pip/generate ──► pip install -r requirements.txt --platform … │ -│ into python/internal/data/pip/ (same layout) │ -│ │ -└──────────────────────────────────────────────────────────────────────┘ - │ │ - └──────────── git tag ──────────────────┘ - v0.0.0--- - -┌───────────────────────── build-time (user's app) ───────────────────┐ -│ │ -│ //go:embed all:linux-amd64 (build constraint per file) │ -│ var _data embed.FS │ -│ var Data, _ = fs.Sub(_data, "linux-amd64") │ -│ │ -│ ► go link only embeds the bytes for GOOS/GOARCH of the build. │ -│ │ -└─────────────────────────────────────────────────────────────────────┘ - -┌───────────────────────── runtime (user's app) ──────────────────────┐ -│ │ -│ python.NewEmbeddedPython(name) │ -│ └─► embed_util.NewEmbeddedFiles(data.Data, "python-"+name) │ -│ ├── read files.json from embed.FS │ -│ ├── compute SHA-256 hash of manifest │ -│ ├── extractedPath := $TMPDIR/go-embedded-- │ -│ ├── flock(extractedPath + ".lock") -- crash-safe │ -│ ├── for each entry in manifest: │ -│ │ • if file exists and Size matches → skip │ -│ │ • else gunzip from embed.FS → write to disk │ -│ │ • replay symlinks from manifest │ -│ └── return EmbeddedFiles{extractedPath} │ -│ │ -│ └─► NewPython(WithPythonHome(extractedPath)) │ -│ returns an *exec.Cmd factory that sets PYTHONHOME and │ -│ PYTHONPATH for you before exec. │ -│ │ -└─────────────────────────────────────────────────────────────────────┘ + +The build constraints mean the linker only embeds the bytes for the +`GOOS`/`GOARCH` combo you are compiling for. A `GOOS=linux GOARCH=arm64` +build pulls in one `linux-arm64` tree and nothing else. + +### Runtime + +`python.NewEmbeddedPython(name)` is the only entry point most users +need. Under the hood: + ``` +NewEmbeddedPython(name) + └─ embed_util.NewEmbeddedFiles(data.Data, "python-"+name) + ├─ read files.json from embed.FS + ├─ compute SHA-256 of the manifest + ├─ extractedPath := $TMPDIR/go-embedded-- + ├─ flock(extractedPath + ".lock") // serialize peers + ├─ walk manifest: + │ • if target exists and Size matches → skip + │ • else gunzip from embed.FS → write to disk + │ • replay symlinks through the manifest + └─ return EmbeddedFiles{extractedPath} + └─ NewPython(WithPythonHome(extractedPath)) +``` + +The hash suffix in the directory name lets two differently-versioned +binaries on the same machine coexist without stepping on each other. +The flock lock means two copies of the same binary starting at the same +time will not race to extract into the same directory. ### The pieces -`internal/tar.go` - : Streaming zstd → tar extractor used at release time. Handles regular - files, directories, symlinks. Hardlinks currently raise an error (see - [#7](#roadmap)). - -`internal/cleanup_python.go` - : Applies a glob-based keep-list to the extracted PBS install tree, - removing test suites, documentation, IDLE, tkinter demos, and other - stdlib weight we do not want to ship. - -`embed_util/file_list.go` - : Defines `fileList` / `fileListEntry`. Each entry records mode, size, - compression flag, and symlink target. The full list is serialized to - `files.json` and hashed to form the extraction directory suffix. - -`embed_util/packer.go` - : Walks the cleaned install tree, gzip-compresses each regular file at - `BestCompression`, and writes `*.gz` alongside a manifest. Also - generates the per-platform `embed__.go` stub that the Go - compiler consumes via `//go:embed all:-`. - -`embed_util/embedded_files.go` - : The runtime extractor. Takes any `fs.FS` (so you can also embed your - own pip packages), resolves symlinks through the manifest, and writes - to a per-hash directory under `$TMPDIR`. A `gofrs/flock` lock serializes - concurrent extractions so that multiple processes in the same host - cannot race. - -`python/embedded_python.go` - : The user-facing `EmbeddedPython` type. Couples an `EmbeddedFiles` - (extraction) with a `Python` (exec.Cmd factory). `PYTHONHOME` is wired - up automatically; call `AddPythonPath(dir)` to splice additional - directories — normally the extracted path of a pip-packed `embed.FS`. - -`pip/*` - : Build-time helpers that shell out to the embedded pip (25.2 at the - time of writing) with `--platform` filters to fetch cross-platform - wheels into `./data/-/`. You wire this into your own - project with a `//go:generate go run ./generate` stub. - -### Why per-file gzip instead of a single tarball - -The obvious alternative is a single `.tar.zst` blob extracted on first -run. The original authors chose per-file gzip and that choice still pays -off: - -1. `embed.FS` lookups are path-based. Per-file entries let runtime check - whether a file already exists on disk at the right size and skip it - — the "unchanged" fast path turns second-run extraction into a few - hundred `stat` calls. -2. Partial extracts fail gracefully. If the process is killed in the - middle of extraction, the next run resumes per-file without having - to redecompress a 100 MB archive. -3. gzip is in the Go standard library. zstd would shave 15–30 % off the - compressed size — there is an open thought in [`#roadmap`](#roadmap) - to swap it — but it is not free; the packer already depends on - `klauspost/compress` for the release-time tarball, so the trade-off - is mostly about runtime decompressor choice. +`internal/tar.go` is a streaming zstd→tar extractor used at release +time. It handles regular files, directories, and symlinks. Hardlinks +currently error out, which is fine for CPython but worth knowing if +you fork this for a different payload. + +`internal/cleanup_python.go` holds the glob-driven keep-list that +trims the stdlib. This is where you tweak things if you want to ship +or drop a particular module. + +`embed_util/file_list.go` defines `fileList` and `fileListEntry`. +Each entry records name, mode, size, compression flag, and symlink +target. The full list is what gets serialized into `files.json`. + +`embed_util/packer.go` does the release-time packing. It walks the +install tree and compresses each regular file individually using +`compress/gzip` at `BestCompression`. It also writes the +`embed__.go` stub. + +`embed_util/embedded_files.go` is the runtime extractor. It takes any +`fs.FS`, so the same code extracts both the interpreter and your own +pip-packed packages. Symlinks resolve through the manifest rather than +through the host filesystem, which matters because `embed.FS` cannot +represent symlinks natively. + +`python/embedded_python.go` ties `EmbeddedFiles` to a `Python` +interface. The `Python` interface is a thin wrapper around +`exec.Command` that sets `PYTHONHOME` and `PYTHONPATH` for you. +`AddPythonPath(dir)` splices an additional directory in, which is how +you wire pip-packed packages into the interpreter. + +`pip/pip_lib.go` and `pip/embed_pip_packages.go` are build-time helpers +that package pip itself (already embedded in `pip/internal/data/`) and +run it against a user's `requirements.txt`. Platform selection uses +pip's `--platform` flag with hardcoded tags that map to the supported +`(goos, goarch)` matrix. + +### Per-file gzip vs one big tarball + +The obvious alternative to per-file gzip is a single `.tar.zst` blob +that gets extracted on first run. The original authors picked per-file +gzip and I agree with the choice. + +First, `embed.FS` lookups are path-based. Per-file entries let the +extractor `stat` each target, compare sizes, and skip writes when the +disk is already in sync with the manifest. The second-run fast path +is a few hundred syscalls and no decompression. + +Second, partial extracts degrade gracefully. Kill the process halfway +through first-run extraction and the next run resumes one file at a +time instead of having to redecompress a 100 MB archive. + +Third, gzip is in the standard library. zstd would shave 15 to 30 +percent off the compressed payload and the packer already pulls in +`klauspost/compress` for the release-time tarball, so switching is +not much work. It is on the [Roadmap](#roadmap); it just hasn't +bubbled to the top. ### python-build-standalone -The Python distributions themselves come from +The actual Python distributions come from [`astral-sh/python-build-standalone`](https://github.com/astral-sh/python-build-standalone) -(PBS), which ships fully relocatable, statically-linked, PGO+LTO CPython -builds with pinned OpenSSL, sqlite, libexpat, etc. That project does -almost all of the interesting work — CPython builds that are actually -portable across glibc versions are not easy. - -PBS was originally maintained by Gregory Szorc as part of the -[PyOxidizer](https://github.com/indygreg/PyOxidizer) effort under -`indygreg/python-build-standalone`. Astral (the `uv` / `ruff` folks) -took over the project in early 2024 and it is now the foundation for -`uv python install`, Astral's own Python installer. `goempy` rides on -top of the same releases. - -`python/generate/main.go` downloads the `*-pgo+lto-full.tar.zst` (or -`*-pgo-full.tar.zst` on Windows — PBS does not do LTO on MSVC builds) -for each (`goos`, `goarch`) in the matrix. A `--only-platforms` flag is -available for local development. +(PBS). That project produces fully relocatable, statically-linked, +PGO+LTO CPython builds with pinned OpenSSL, sqlite, libexpat, ncurses, +and so on. Portable CPython is harder than it sounds and they are the +people who have actually solved it. + +PBS was originally Gregory Szorc's project under +`indygreg/python-build-standalone`, built to support +[PyOxidizer](https://github.com/indygreg/PyOxidizer). Astral, the team +behind `uv` and `ruff`, took over maintenance in early 2024. It now +underpins `uv python install` and is one of the load-bearing pieces of +the modern Python packaging stack. + +`goempy` pulls the `*-pgo+lto-full.tar.zst` build on Unix and the +`*-pgo-full.tar.zst` build on Windows (no LTO on MSVC). For local +development the generator accepts a `--only-platforms=darwin/arm64` +flag so you can iterate without downloading all five builds. ## Supported platforms -| GOOS | GOARCH | PBS triple | LTO | Notes | -|---------|--------|-------------------------------------|-----|------------------------------------------| -| linux | amd64 | `x86_64-unknown-linux-gnu` | yes | glibc ≥ 2.17 (manylinux_2_17) | -| linux | arm64 | `aarch64-unknown-linux-gnu` | yes | upgraded to pgo+lto in PBS 20260414 | -| darwin | amd64 | `x86_64-apple-darwin` | yes | macOS 11+ (x86_64) | -| darwin | arm64 | `aarch64-apple-darwin` | yes | macOS 11+ (Apple Silicon) | -| windows | amd64 | `x86_64-pc-windows-msvc` | no | non-shared libpython; `.exe` entry point | +| GOOS | GOARCH | PBS triple | LTO | Notes | +|---------|--------|------------------------------|-----|------------------------------------| +| linux | amd64 | `x86_64-unknown-linux-gnu` | yes | glibc ≥ 2.17 (manylinux_2_17) | +| linux | arm64 | `aarch64-unknown-linux-gnu` | yes | PGO+LTO since PBS 20260414 | +| darwin | amd64 | `x86_64-apple-darwin` | yes | macOS 11+ | +| darwin | arm64 | `aarch64-apple-darwin` | yes | macOS 11+ on Apple Silicon | +| windows | amd64 | `x86_64-pc-windows-msvc` | no | non-shared libpython; `.exe` entry | -Not yet wired up (PBS has the artifacts, just not in this fork's matrix): -`windows/arm64`, `linux/musl-{amd64,arm64}`, Linux micro-arch variants -(`x86_64_v{2,3,4}`), Android, Emscripten, and the free-threaded (PEP 703) -builds. See [Roadmap](#roadmap). +Not yet wired up: `windows/arm64`, `linux/musl` (amd64 and arm64), +`x86_64_v{2,3,4}` micro-arch Linux builds, Android, Emscripten, and +the free-threaded (PEP 703) variants. PBS has all of those; the +matrix in `release.yml` just does not enumerate them yet. ## Supported Python versions -Each release tag is a `(python, pbs)` pair. As of the first `goempy` -release (April 2026), PBS 20260414 ships: +Each tag is one `(python, pbs)` pair. As of the first `goempy` release +in April 2026, PBS 20260414 ships: -| Python | Status | Notes | -|---------|----------|---------------------------------------------------------| -| 3.14.4 | primary | PEP 703 GIL-disable available via free-threaded builds | -| 3.13.13 | stable | | -| 3.12.13 | stable | | -| 3.11.15 | stable | | -| 3.10.20 | sunsetting | CPython upstream EOL October 2026 | +| Python | Status | Notes | +|---------|------------|------------------------------------------------------| +| 3.14.4 | primary | free-threaded builds available but not yet packaged | +| 3.13.13 | stable | | +| 3.12.13 | stable | | +| 3.11.15 | stable | | +| 3.10.20 | sunsetting | CPython upstream EOL in October 2026 | -All of them build from a single `release.yml` matrix. +One `release.yml` matrix run produces all of them. -## Release tags - -Tag format: `v0.0.0---`. Example: +## Release tag scheme ``` v0.0.0-3.14.4-20260414-1 - │ │ └─ build number (for re-releases of the same pair) + │ │ └─ build number (re-runs of the same pair) │ └────────── python-build-standalone release date tag └───────────────── CPython version ``` -The leading `v0.0.0` is intentional — this library does not follow semver +The `v0.0.0` prefix is deliberate. This library does not follow semver, and probably never will. The meaningful identifier is the -`-` suffix. `go get` against a specific tag to pin. Dependabot -and similar tools may mis-resolve upgrades against this scheme; review -Python version bumps manually. +`-` suffix. Pin exactly that with `go get`. + +Dependabot and similar tools tend to mis-resolve upgrades against this +scheme. Bump Python versions by hand. ## Embedding pip packages -The same machinery that packs CPython also packs pip-installed -dependencies into a per-platform `embed.FS`. Inside your repo: +The same packer that produces the interpreter also packs pip-installed +wheels into a per-platform `embed.FS`. In your repo: ```go // internal/mylib/generate/main.go @@ -267,16 +306,17 @@ package main import "github.com/tamnd/goempy/pip" func main() { - if err := pip.CreateEmbeddedPipPackagesForKnownPlatforms( + err := pip.CreateEmbeddedPipPackagesForKnownPlatforms( "requirements.txt", "./data/", - ); err != nil { + ) + if err != nil { panic(err) } } ``` -``` +```go // internal/mylib/dummy.go package mylib @@ -290,131 +330,142 @@ jinja2==3.1.4 Then `go generate ./internal/mylib/...`. The generator downloads wheels for every `(goos, goarch)` in the matrix using -`pip install --platform … --only-binary=:all:`, packs each target directory -the same way the interpreter is packed, and emits a `data.Data` `embed.FS` -per platform. At runtime: +`pip install --platform … --only-binary=:all:` and writes each target +directory out using the same packing logic. At runtime: ```go libs, _ := embed_util.NewEmbeddedFiles(data.Data, "mylib-pip") ep.AddPythonPath(libs.GetExtractedPath()) + cmd, _ := ep.PythonCmd("-c", "import jinja2; print(jinja2.__version__)") ``` -A working example is the [`example/`](./example) directory in this repo, -and a much more complete one lives in -[`kluctl/go-jinja2`](https://github.com/kluctl/go-jinja2) (which is where -this pattern was originally battle-tested). +The [`example/`](./example) directory in this repo has a small working +version. [`kluctl/go-jinja2`](https://github.com/kluctl/go-jinja2) has +a much fuller one, and that was where the pattern was first +stress-tested in production. ## Roadmap -Things I want to do, in roughly decreasing priority: - -1. **Fix the manifest integrity check.** Current "unchanged" fast path - compares only `Size()`. Across a 3.13 → 3.14 upgrade, same-named - stdlib files can have identical sizes and stale bytes on disk survive - the check. Use the per-entry content hash that already exists in - `files.json`. -2. **Windows path normalization.** When the packer runs on Windows, - `filepath.Separator` leaks into `files.json` entries as `\`. Force - `/` everywhere in the manifest. (Carries intent of upstream PR #50.) -3. **Per-file zstd** instead of gzip. Pure-Go decoder via - `klauspost/compress/zstd`; saves 15–30 % of embedded bytes. -4. **`windows/arm64`, `linux/musl-{amd64,arm64}`** in the matrix. -5. **Free-threaded (PEP 703) variants** as an opt-in matrix axis. PBS - already ships `*-freethreaded+pgo-full.tar.zst` everywhere that - matters. -6. **Lazy extract**: a single compressed blob + streaming extract on - first `PythonCmd` call. Optional mode — the per-file layout stays - default for the fast-skip path. -7. **Android / iOS triples**. PBS 20260414 has Android, and 3.14 makes - it tier-3. +Rough order of what I want to tackle next: + +1. Fix the manifest integrity check. The runtime "unchanged" path + compares only `Size()`, which can return a false positive across a + Python upgrade if a stdlib file happens to keep the same byte + count. The manifest already carries a content hash; use it. +2. Normalize path separators in the manifest when the packer runs on + Windows. Currently `filepath.Separator` leaks into `files.json` + as `\`, which breaks the per-entry lookup at runtime. +3. Switch per-file compression from gzip to zstd. Pure-Go decoder + via `klauspost/compress/zstd`, already a transitive dependency. +4. Add `windows/arm64` and `linux/musl` (amd64 and arm64) to the + matrix. PBS ships them. +5. Opt-in free-threaded (PEP 703) builds as a matrix axis. +6. A lazy extract mode: ship a single compressed blob and stream-extract + on first `PythonCmd` call. Would be an option, not the default; the + per-file layout is worth keeping for the skip-on-unchanged fast path. +7. Android and iOS triples. PBS 20260414 has Android, and Python 3.14 + promoted it to tier-3. Contributions welcome. ## Non-goals -- **In-process Python via CGo.** Explicitly out of scope. If you want - that, use `go-python/cpy3` or PyOxidizer and make peace with libpython. -- **Python → Go bindings.** Use - [`go-python/gopy`](https://github.com/go-python/gopy) for that. -- **A general-purpose `embed.FS` compressor.** The `embed_util` package - is not trying to be one, even though it could look like one. It is - tuned for (large, many-file, partially-symlinked Python trees). - -## Credits +In-process Python via CGo is explicitly out of scope. If that is what +you want, go with `go-python/cpy3` or PyOxidizer and make peace with +shipping libpython. -### Upstream authors +Python-calls-Go or Go-calls-Python bindings are also out of scope. +[`go-python/gopy`](https://github.com/go-python/gopy) is the project +for that, and it composes fine with this one. -All of the design and the vast majority of the code in this repository -come from the original -[`kluctl/go-embed-python`](https://github.com/kluctl/go-embed-python), -authored by Alexander Block ([@codablock](https://github.com/codablock)) -and contributors in the Kluctl organization. The project was extracted -from [`kluctl/kluctl`](https://github.com/kluctl/kluctl), a Kubernetes -GitOps tool, where it was used to ship Jinja2 templating without -depending on a system Python. If this library helps you, star the -[upstream repo](https://github.com/kluctl/go-embed-python) first. +A general-purpose `embed.FS` compressor is not something I want +`embed_util` to become, even though it could look like one. The +package is tuned for the particular shape of a CPython install tree: +many small files, some symlinks, no hardlinks, predictable layout. -This fork adds: - -- Python 3.14.4 + python-build-standalone `20260414` -- Fix for the Windows PBS triple rename (`pc-windows-msvc-shared-pgo-full` - → `pc-windows-msvc-pgo-full`) that silently broke the 3.14 download path -- Upgrade of `linux/arm64` to PGO+LTO (PBS now ships it) -- Go toolchain 1.19 → 1.24, `log/slog` in place of `sirupsen/logrus` -- pip 24.3.1 → 25.2, with explicit `setuptools>=75` / `wheel>=0.45` pins -- `--only-platforms` on the generator for scoped local builds -- Docs rewritten +## Credits -Individual file headers retain their original attribution and Apache-2.0 -licensing. Nothing in this fork is relicensed. +### Upstream: kluctl/go-embed-python + +The original work, and the overwhelming majority of the code in this +repository, is +[`kluctl/go-embed-python`](https://github.com/kluctl/go-embed-python). +It was written by Alexander Block +([@codablock](https://github.com/codablock)) and the Kluctl +contributors. The project was extracted from +[`kluctl/kluctl`](https://github.com/kluctl/kluctl), a Kubernetes GitOps +tool where it was originally used to embed Jinja2 templating without a +system Python dependency. + +If this library helps you, please go star the upstream repository. +Everything clever here is theirs. + +This fork adds, relative to the last upstream release +(`v0.0.0-3.13.1-20241219-1`): + +- Python 3.14.4 and python-build-standalone 20260414. +- Fix for the Windows PBS triple rename from + `pc-windows-msvc-shared-pgo-full` to `pc-windows-msvc-pgo-full`. + Without it, the 3.14 download silently 404'd on Windows. +- Upgrade of `linux/arm64` to PGO+LTO. PBS used to ship only + `lto-full` for aarch64 Linux and now ships `pgo+lto-full`. +- Go toolchain 1.19 to 1.24. `sirupsen/logrus` replaced by the + standard library's `log/slog`. +- pip 24.3.1 to 25.2, with `setuptools>=75` and `wheel>=0.45` pinned + explicitly so that `get-pip.py` cannot drift to older versions. +- A `--only-platforms` flag on `python/generate` so you can build + one platform locally instead of all five. +- Docs rewrite. + +Individual file headers keep their original attribution. The project +stays Apache-2.0. Nothing is relicensed. ### python-build-standalone -The Python distributions themselves are -[`astral-sh/python-build-standalone`](https://github.com/astral-sh/python-build-standalone), -originally +The CPython builds we redistribute come from +[`astral-sh/python-build-standalone`](https://github.com/astral-sh/python-build-standalone). +Before Astral took over, the project was [`indygreg/python-build-standalone`](https://github.com/indygreg/python-build-standalone) -by Gregory Szorc. Astral took maintenance over in 2024 and it is now one -of the two or three most important projects in the Python packaging -ecosystem. Everything in this fork rests on their work. +and was maintained by Gregory Szorc. Portable CPython is a hard +problem and this project is one of the quiet load-bearing pieces of +the Python packaging ecosystem. ## Related projects Things you might want instead, or alongside: -- **[kluctl/go-embed-python](https://github.com/kluctl/go-embed-python)** - — upstream. Use this if you are on Python ≤ 3.13 and the last upstream - release meets your needs. -- **[kluctl/go-jinja2](https://github.com/kluctl/go-jinja2)** — Jinja2 +- [`kluctl/go-embed-python`](https://github.com/kluctl/go-embed-python). + Upstream. If you are on Python 3.13 or earlier and the last upstream + release meets your needs, use this. +- [`kluctl/go-jinja2`](https://github.com/kluctl/go-jinja2). Jinja2 templating for Go, implemented by driving an embedded Python - subprocess. Best real-world consumer of `go-embed-python` and a useful - reference for how to wire pip-embedded libraries into it. -- **[kluctl/kluctl](https://github.com/kluctl/kluctl)** — the GitOps - tool where this code originated. -- **[astral-sh/python-build-standalone](https://github.com/astral-sh/python-build-standalone)** - — the portable CPython builds we redistribute. Read their release + subprocess. The best worked example of how to wire a pip-packed + `embed.FS` into an `EmbeddedPython`. +- [`kluctl/kluctl`](https://github.com/kluctl/kluctl). Kubernetes + GitOps tool. Where this code originally came from. +- [`astral-sh/python-build-standalone`](https://github.com/astral-sh/python-build-standalone). + The portable CPython builds we redistribute. Read their release notes before upgrading. -- **[astral-sh/uv](https://github.com/astral-sh/uv)** — a Rust-based - Python package / project manager that also consumes PBS. Conceptual - sibling: if you are a Rust shop, `uv` does for Rust binaries what - `goempy` does for Go binaries (minus the CGo-free subprocess model). -- **[indygreg/PyOxidizer](https://github.com/indygreg/PyOxidizer)** and - **[pyembed](https://github.com/indygreg/PyOxidizer/tree/main/pyembed)** - — the other approach: in-process CPython linked into a single binary. - Largely unmaintained at this point. -- **[go-python/gopy](https://github.com/go-python/gopy)** — generate Go - bindings for Python packages. Different problem; they complement each - other. -- **[go-python/cpy3](https://github.com/go-python/cpy3)** — direct CGo - bindings to libpython. What you use if you really want to call Python - in-process and do not care about deployment pain. -- **[cibuildwheel](https://github.com/pypa/cibuildwheel)** — worth a - read for the cross-platform CI matrix patterns we imitate. +- [`astral-sh/uv`](https://github.com/astral-sh/uv). Rust-based Python + package and project manager. Also consumes PBS under the hood. If + you are reaching for Python from Rust, `uv` does roughly what + `goempy` does from Go, minus the subprocess model. +- [`indygreg/PyOxidizer`](https://github.com/indygreg/PyOxidizer) and + its [`pyembed`](https://github.com/indygreg/PyOxidizer/tree/main/pyembed) + crate. The other approach: in-process CPython linked into a single + binary. Mostly unmaintained now. +- [`go-python/gopy`](https://github.com/go-python/gopy). Generator for + Go bindings to Python packages. Different problem; complementary. +- [`go-python/cpy3`](https://github.com/go-python/cpy3). Direct CGo + bindings to libpython. What you use if you truly want in-process + Python and can live with the deployment pain. +- [`pypa/cibuildwheel`](https://github.com/pypa/cibuildwheel). Useful + reading for the cross-platform CI matrix patterns we imitate. ## License -Apache-2.0, the same as upstream. See [`LICENSE`](./LICENSE). +Apache-2.0, same as upstream. See [`LICENSE`](./LICENSE). Original copyright: Kluctl contributors. Fork maintenance: Duc-Tam Nguyen <tamnd@liteio.dev>. From f683f92bc0da6d4e2bd5b82d94c9f84d1fa50e4f Mon Sep 17 00:00:00 2001 From: Duc-Tam Nguyen <1218621+tamnd@users.noreply.github.com> Date: Mon, 20 Apr 2026 19:33:55 +0700 Subject: [PATCH 5/5] release: Switch to Go-idiomatic tag scheme The old v0.0.0--- format was inherited from upstream but it confuses `go get` and Dependabot, and the v0.0.0 prefix signals "this is not a real module" when it actually is one. Replace it with a normal semver line: v0.1.0 primary Python (3.14) v0.1.0-py3.13.13 secondary Python lines, valid semver prereleases v0.1.0-py3.12.13 v0.1.0-py3.11.15 v0.1.0-py3.10.20 `go get @latest` now resolves to the primary tag. Other lines are pinned explicitly with the full tag. Library version lives in VERSION, primary Python in PRIMARY_PYTHON. hack/tag-name.sh computes the right tag; hack/build-tag.sh writes the chosen tag to a `tag-name` artifact that release.yml reads back in the release job. next-build-num.sh is gone. --- .github/workflows/release.yml | 10 ++++------ PRIMARY_PYTHON | 1 + README.md | 33 +++++++++++++++++++-------------- VERSION | 1 + hack/build-tag.sh | 16 ++++++---------- hack/next-build-num.sh | 35 ----------------------------------- hack/tag-name.sh | 33 +++++++++++++++++++++++++++++++++ 7 files changed, 64 insertions(+), 65 deletions(-) create mode 100644 PRIMARY_PYTHON create mode 100644 VERSION delete mode 100755 hack/next-build-num.sh create mode 100755 hack/tag-name.sh diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5e417881..b7a75e5e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -67,9 +67,7 @@ jobs: run: | git config --global user.email "no@mail.exists" git config --global user.name "goempy releaser" - BUILD_NUM=$(./hack/next-build-num.sh ${{ matrix.pythonStandaloneVersion }} ${{ matrix.pythonVersion }}) - ./hack/build-tag.sh ${{ matrix.pythonStandaloneVersion }} ${{ matrix.pythonVersion }} $BUILD_NUM - echo $BUILD_NUM > build-num + ./hack/build-tag.sh ${{ matrix.pythonStandaloneVersion }} ${{ matrix.pythonVersion }} - name: git gc run: | git gc @@ -81,7 +79,7 @@ jobs: name: workdir-${{ matrix.pythonStandaloneVersion }} ${{ matrix.pythonVersion }} path: | git-dir - build-num + tag-name tests: needs: @@ -108,7 +106,7 @@ jobs: shell: bash run: | git reset --hard - git checkout v0.0.0-${{ matrix.pythonVersion }}-${{ matrix.pythonStandaloneVersion }}-$(cat build-num) + git checkout "$(cat tag-name)" - name: Set up Go uses: actions/setup-go@v5 with: @@ -146,4 +144,4 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: push tag run: | - git push origin v0.0.0-${{ matrix.pythonVersion }}-${{ matrix.pythonStandaloneVersion }}-$(cat build-num) + git push origin "$(cat tag-name)" diff --git a/PRIMARY_PYTHON b/PRIMARY_PYTHON new file mode 100644 index 00000000..6324d401 --- /dev/null +++ b/PRIMARY_PYTHON @@ -0,0 +1 @@ +3.14 diff --git a/README.md b/README.md index 366364c4..00ddf9f4 100644 --- a/README.md +++ b/README.md @@ -114,8 +114,10 @@ each target directory using the same `embed_util.CopyForEmbed` helper. The release workflow finishes by committing `python/internal/data/` and `pip/internal/data/` to a detached branch -and tagging it as `v0.0.0---`. The main branch stays -slim because the binary data never lives on it. +and tagging it. The primary Python line (currently 3.14) gets a clean +`v` tag, so `go get @latest` resolves to it. Secondary lines get +a semver prerelease suffix like `v-py3.13.13`. The main branch +stays slim because the binary data never lives on it. ### Build time @@ -280,19 +282,22 @@ One `release.yml` matrix run produces all of them. ## Release tag scheme -``` -v0.0.0-3.14.4-20260414-1 - │ │ └─ build number (re-runs of the same pair) - │ └────────── python-build-standalone release date tag - └───────────────── CPython version -``` +Each release produces one tag per supported Python line. The primary +line (3.14 right now) gets a clean semver tag. The other lines get +valid semver prereleases so that `go get @latest` always lands on +the primary: -The `v0.0.0` prefix is deliberate. This library does not follow semver, -and probably never will. The meaningful identifier is the -`-` suffix. Pin exactly that with `go get`. +| CPython | Tag | How to pin | +|---------|----------------------|---------------------------------------------------| +| 3.14.4 | `v0.1.0` | `go get github.com/tamnd/goempy@latest` | +| 3.13.13 | `v0.1.0-py3.13.13` | `go get github.com/tamnd/goempy@v0.1.0-py3.13.13` | +| 3.12.13 | `v0.1.0-py3.12.13` | `go get github.com/tamnd/goempy@v0.1.0-py3.12.13` | +| 3.11.15 | `v0.1.0-py3.11.15` | `go get github.com/tamnd/goempy@v0.1.0-py3.11.15` | +| 3.10.20 | `v0.1.0-py3.10.20` | `go get github.com/tamnd/goempy@v0.1.0-py3.10.20` | -Dependabot and similar tools tend to mis-resolve upgrades against this -scheme. Bump Python versions by hand. +The library version itself lives in the `VERSION` file at the repo +root; the primary Python line lives in `PRIMARY_PYTHON`. Bumping +either bumps every tag in the next release. ## Embedding pip packages @@ -402,7 +407,7 @@ If this library helps you, please go star the upstream repository. Everything clever here is theirs. This fork adds, relative to the last upstream release -(`v0.0.0-3.13.1-20241219-1`): +(`v0.0.0-3.13.1-20241219-1` in the old scheme): - Python 3.14.4 and python-build-standalone 20260414. - Fix for the Windows PBS triple rename from diff --git a/VERSION b/VERSION new file mode 100644 index 00000000..b82608c0 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +v0.1.0 diff --git a/hack/build-tag.sh b/hack/build-tag.sh index 2c8cca09..ad88d422 100755 --- a/hack/build-tag.sh +++ b/hack/build-tag.sh @@ -7,7 +7,6 @@ cd $DIR/.. PYTHON_STANDALONE_VERSION=$1 PYTHON_VERSION=$2 -BUILD_NUM=$3 if [ "$PYTHON_STANDALONE_VERSION" = "" ]; then echo "missing python-standalone version" @@ -19,25 +18,22 @@ if [ "$PYTHON_VERSION" = "" ]; then exit 1 fi -if [ "$BUILD_NUM" = "" ]; then - echo "missing build num" - exit 1 -fi - if [ ! -z "$(git status --porcelain)" ]; then echo "working directory is dirty!" exit 1 fi +TAG=$("$DIR/tag-name.sh" "$PYTHON_VERSION") + go run ./python/generate --python-standalone-version=$PYTHON_STANDALONE_VERSION --python-version $PYTHON_VERSION go run ./pip/generate -TAG=v0.0.0-$PYTHON_VERSION-$PYTHON_STANDALONE_VERSION-$BUILD_NUM - -echo "checking out temporary branch" +echo "tagging as $TAG (python $PYTHON_VERSION, python-build-standalone $PYTHON_STANDALONE_VERSION)" git checkout --detach git add -f python/internal/data git add -f pip/internal/data -git commit -m "added python $PYTHON_VERSION from python-standalone $PYTHON_STANDALONE_VERSION" +git commit -m "python $PYTHON_VERSION + python-build-standalone $PYTHON_STANDALONE_VERSION ($TAG)" git tag -f $TAG git checkout - + +echo "$TAG" > tag-name diff --git a/hack/next-build-num.sh b/hack/next-build-num.sh deleted file mode 100755 index 10711077..00000000 --- a/hack/next-build-num.sh +++ /dev/null @@ -1,35 +0,0 @@ -set -e - -DIR=$(cd $(dirname $0) && pwd) -cd $DIR/.. - -PYTHON_STANDALONE_VERSION=$1 -PYTHON_VERSION=$2 - -if [ "$PYTHON_STANDALONE_VERSION" = "" ]; then - echo "missing python-standalone version" - exit 1 -fi - -if [ "$PYTHON_VERSION" = "" ]; then - echo "missing python version" - exit 1 -fi - -REMOTE_TAGS=$(git ls-remote) -LOCAL_TAGS=$(git tag) -#echo REMOTE_TAGS=$REMOTE_TAGS -#echo LOCAL_TAGS=$LOCAL_TAGS - -BUILD_NUM=1 - -while true; do - TAG=v0.0.0-$PYTHON_VERSION-$PYTHON_STANDALONE_VERSION-$BUILD_NUM - if [ "$(echo $REMOTE_TAGS | grep "refs/tags/$TAG")" != "" -o "$(echo $LOCAL_TAGS | grep "$TAG")" != "" ] ; then - BUILD_NUM=$(($BUILD_NUM+1)) - else - break - fi -done - -echo $BUILD_NUM diff --git a/hack/tag-name.sh b/hack/tag-name.sh new file mode 100755 index 00000000..87ec14cb --- /dev/null +++ b/hack/tag-name.sh @@ -0,0 +1,33 @@ +#!/bin/sh +# +# Compute the release tag for a given Python patch version. +# +# v for the primary Python line (VERSION + PRIMARY_PYTHON) +# v-py for every other Python line +# +# The primary tag has no prerelease suffix so `go get @latest` resolves to it. +# Non-primary tags are valid semver prereleases of the primary tag, so users +# pin them explicitly. + +set -e + +DIR=$(cd $(dirname $0) && pwd) +cd $DIR/.. + +PYTHON_VERSION=$1 +if [ -z "$PYTHON_VERSION" ]; then + echo "missing python version" >&2 + exit 1 +fi + +LIB_VERSION=$(tr -d ' \t\r\n' < VERSION) +PRIMARY_PYMM=$(tr -d ' \t\r\n' < PRIMARY_PYTHON 2>/dev/null || true) +: ${PRIMARY_PYMM:=3.14} + +PYMM=$(echo "$PYTHON_VERSION" | cut -d. -f1-2) + +if [ "$PYMM" = "$PRIMARY_PYMM" ]; then + printf "%s\n" "$LIB_VERSION" +else + printf "%s-py%s\n" "$LIB_VERSION" "$PYTHON_VERSION" +fi