Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,17 @@ if [ "$SEDONA_VERSION" = "latest" ]; then

# The compilation must take place outside Docker to avoid unnecessary maven packages
mvn clean install -DskipTests -Dspark="${SEDONA_SPARK_VERSION}" -Dscala=2.13
else
# When building against a published Sedona version, install-sedona.sh
# downloads the shaded JAR from Maven Central inside the container and
# never reads spark-shaded/target/. Any stale Maven artifacts in the
# local tree would still be pulled into the build context by the
# `COPY ./spark-shaded/` step, ship in the published manifest, and add
# to every pull's download size — even though the dockerfile's trailing
# `RUN rm -rf ${SEDONA_HOME}` deletes them from the runtime filesystem.
# apache/sedona:1.9.0 hit this regression and shipped 1.1 GB heavier than
# 1.8.1 (4.03 GB vs 2.97 GB compressed) for exactly this reason.
rm -rf spark-shaded/target python/build python/dist
fi
Comment on lines +84 to 95

# -- Building the image
Expand Down
4 changes: 2 additions & 2 deletions docker/sedona-docker.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ ENV PYTHONPATH=${SPARK_HOME}/python
# Set up OS libraries and PySpark
RUN apt-get update
RUN apt-get install -y openjdk-17-jdk-headless curl python3-pip maven
RUN pip3 install pipenv --break-system-packages
RUN pip3 install --no-cache-dir pipenv --break-system-packages
COPY ./docker/install-spark.sh ${SEDONA_HOME}/docker/
RUN chmod +x ${SEDONA_HOME}/docker/install-spark.sh
RUN ${SEDONA_HOME}/docker/install-spark.sh ${spark_version} ${hadoop_s3_version} ${aws_sdk_version}

# Install Python dependencies
COPY docker/requirements.txt /opt/requirements.txt
RUN pip3 install -r /opt/requirements.txt --break-system-packages
RUN pip3 install --no-cache-dir -r /opt/requirements.txt --break-system-packages
Comment on lines +46 to +53


# Copy local compiled jars and python code to the docker environment
Expand Down
13 changes: 13 additions & 0 deletions docker/sedona-docker.dockerfile.dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,16 @@
!docs/usecases/**
!python/**
!spark-shaded/**

# Re-exclude Python build artifacts and pyc caches so a tree that has
# had `python -m build` run against it does not balloon the COPY layers.
# Note: `**/target/` is intentionally NOT excluded here because the
# build.sh `latest` mode runs `mvn clean install -DskipTests` and then
# install-sedona.sh inside the image copies the freshly-built shaded
# JAR from `${SEDONA_HOME}/spark-shaded/target/`. Stale Maven `target/`
# directories from a prior local build are cleaned up by build.sh
# (non-latest branch) instead — see docker/build.sh.
python/build/
python/dist/
**/*.egg-info/
**/__pycache__/
Comment on lines +13 to +22
Loading