NOLAI · Gulianrdgd · Jan 9, 2026 · Nov 14, 2025 · Nov 14, 2025 · Nov 16, 2025
diff --git a/.gitattributes b/.gitattributes
@@ -15,8 +15,12 @@ ml/backend/**/*.cu linguist-vendored
 ml/backend/**/*.cuh linguist-vendored
 ml/backend/**/*.m linguist-vendored
 ml/backend/**/*.metal linguist-vendored
+ml/backend/**/*.comp linguist-vendored
+ml/backend/**/*.glsl linguist-vendored
 ml/backend/**/CMakeLists.txt linguist-vendored
 
+app/webview linguist-vendored
+
 llama/build-info.cpp linguist-generated
 ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.s linguist-generated
 

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -16,13 +16,15 @@ jobs:
     outputs:
       GOFLAGS: ${{ steps.goflags.outputs.GOFLAGS }}
       VERSION: ${{ steps.goflags.outputs.VERSION }}
+      vendorsha: ${{ steps.changes.outputs.vendorsha }}
     steps:
       - uses: actions/checkout@v4
       - name: Set environment
         id: goflags
         run: |
-          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${GITHUB_REF_NAME#v}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_OUTPUT
-          echo VERSION="${GITHUB_REF_NAME#v}" >>$GITHUB_OUTPUT
+          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${GITHUB_REF_NAME#v}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" | tee -a $GITHUB_OUTPUT
+          echo VERSION="${GITHUB_REF_NAME#v}" | tee -a $GITHUB_OUTPUT
+          echo vendorsha=$(make -f Makefile.sync print-base) | tee -a $GITHUB_OUTPUT
 
   darwin-build:
     runs-on: macos-14-xlarge
@@ -53,6 +55,9 @@ jobs:
       - uses: actions/setup-go@v5
         with:
           go-version-file: go.mod
+          cache-dependency-path: |
+            go.sum
+            Makefile.sync
       - run: |
           ./scripts/build_darwin.sh
       - name: Log build results
@@ -185,7 +190,7 @@ jobs:
       - uses: actions/cache@v4
         with:
           path: ${{ github.workspace }}\.ccache
-          key: ccache-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.preset }}
+          key: ccache-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.preset }}-${{ needs.setup-environment.outputs.vendorsha }}
       - name: Build target "${{ matrix.preset }}"
         run: |
           Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
@@ -249,6 +254,9 @@ jobs:
       - uses: actions/setup-go@v5
         with:
           go-version-file: go.mod
+          cache-dependency-path: |
+            go.sum
+            Makefile.sync
       - name: Verify gcc is actually clang
         run: |
           $ErrorActionPreference='Continue'
@@ -302,6 +310,9 @@ jobs:
       - uses: actions/setup-go@v5
         with:
           go-version-file: go.mod
+          cache-dependency-path: |
+            go.sum
+            Makefile.sync
       - uses: actions/download-artifact@v4
         with:
           pattern: depends-windows*
@@ -366,6 +377,7 @@ jobs:
               bin/ollama)                echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
               lib/ollama/*.so*)          echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
               lib/ollama/cuda_v*)        echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
+              lib/ollama/vulkan*)        echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
               lib/ollama/cuda_jetpack5)  echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
               lib/ollama/cuda_jetpack6)  echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
               lib/ollama/rocm)           echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;;

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -22,6 +22,7 @@ jobs:
     runs-on: ubuntu-latest
     outputs:
       changed: ${{ steps.changes.outputs.changed }}
+      vendorsha: ${{ steps.changes.outputs.vendorsha }}
     steps:
       - uses: actions/checkout@v4
         with:
@@ -37,6 +38,7 @@ jobs:
           }
 
           echo changed=$(changed 'llama/llama.cpp/**/*' 'ml/backend/ggml/ggml/**/*') | tee -a $GITHUB_OUTPUT
+          echo vendorsha=$(make -f Makefile.sync print-base) | tee -a $GITHUB_OUTPUT
 
   linux:
     needs: [changes]
@@ -83,7 +85,7 @@ jobs:
       - uses: actions/cache@v4
         with:
           path: /github/home/.cache/ccache
-          key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}
+          key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}-${{ needs.changes.outputs.vendorsha }}
       - run: |
           cmake --preset ${{ matrix.preset }} ${{ matrix.flags }}
           cmake --build --preset ${{ matrix.preset }} --parallel
@@ -178,7 +180,7 @@ jobs:
       - uses: actions/cache@v4
         with:
           path: ${{ github.workspace }}\.ccache
-          key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}
+          key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}-${{ needs.changes.outputs.vendorsha }}
       - run: |
           Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
           Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -SkipAutomaticLocation  -DevCmdArguments '-arch=x64 -no_logo'
@@ -206,6 +208,9 @@ jobs:
       - uses: actions/setup-go@v5
         with:
           go-version-file: 'go.mod'
+          cache-dependency-path: |
+            go.sum
+            Makefile.sync
       - uses: actions/setup-node@v4
         with:
           node-version: '20'
@@ -226,12 +231,9 @@ jobs:
         if: always()
         run: go test -count=1 -benchtime=1x ./...
 
-      # TODO(bmizerany): replace this heavy tool with just the
-      # tools/checks/binaries we want and then make them all run in parallel
-      # across jobs, not on a single tiny vm on Github Actions.
-      - uses: golangci/golangci-lint-action@v6
+      - uses: golangci/golangci-lint-action@v9
         with:
-          args: --timeout 10m0s -v
+          only-new-issues: true
 
   patches:
     runs-on: ubuntu-latest
@@ -240,4 +242,4 @@ jobs:
       - name: Verify patches apply cleanly and do not change files
         run: |
           make -f Makefile.sync clean checkout apply-patches sync
-          git diff --compact-summary --exit-code
+          git diff --compact-summary --exit-code
diff --git a/.golangci.yaml b/.golangci.yaml
@@ -1,41 +1,51 @@
-run:
-  timeout: 5m
+version: "2"
 linters:
   enable:
     - asasalint
     - bidichk
     - bodyclose
     - containedctx
     - gocheckcompilerdirectives
-    - gofmt
-    - gofumpt
-    - gosimple
-    - govet
-    - ineffassign
     - intrange
     - makezero
     - misspell
     - nilerr
     - nolintlint
     - nosprintfhostport
-    - staticcheck
     - unconvert
     - usetesting
     - wastedassign
     - whitespace
   disable:
-    - usestdlibvars
     - errcheck
-linters-settings:
-  staticcheck:
-    checks:
-      - all
-      - -SA1019 # omit Deprecated check
+    - usestdlibvars
+  settings:
+    govet:
+      disable:
+        - unusedresult
+    staticcheck:
+      checks:
+        - all
+        - -QF* # disable quick fix suggestions
+        - -SA1019
+        - -ST1000 # package comment format
+        - -ST1003 # underscores in package names
+        - -ST1005 # error strings should not be capitalized
+        - -ST1012 # error var naming (ErrFoo)
+        - -ST1016 # receiver name consistency
+        - -ST1020 # comment on exported function format
+        - -ST1021 # comment on exported type format
+        - -ST1022 # comment on exported var format
+        - -ST1023 # omit type from declaration
 severity:
-  default-severity: error
+  default: error
   rules:
     - linters:
         - gofmt
         - goimports
         - intrange
       severity: info
+formatters:
+  enable:
+    - gofmt
+    - gofumpt
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -54,6 +54,13 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cp
 
 add_compile_definitions(NDEBUG GGML_VERSION=0x0 GGML_COMMIT=0x0)
 
+# Define GGML version variables for shared library SOVERSION
+# These are required by ggml/src/CMakeLists.txt for proper library versioning
+set(GGML_VERSION_MAJOR 0)
+set(GGML_VERSION_MINOR 0)
+set(GGML_VERSION_PATCH 0)
+set(GGML_VERSION "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}")
+
 set(GGML_CPU ON)
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
 set_property(TARGET ggml PROPERTY EXCLUDE_FROM_ALL TRUE)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -16,7 +16,7 @@ See the [development documentation](./docs/development.md) for instructions on h
 
 * New features: new features (e.g. API fields, environment variables) add surface area to Ollama and make it harder to maintain in the long run as they cannot be removed without potentially breaking users in the future.
 * Refactoring: large code improvements are important, but can be harder or take longer to review and merge.
-* Documentation: small updates to fill in or correct missing documentation is helpful, however large documentation additions can be hard to maintain over time.
+* Documentation: small updates to fill in or correct missing documentation are helpful, however large documentation additions can be hard to maintain over time.
 
 ### Issues that may not be accepted
 
@@ -43,7 +43,7 @@ Tips for proposals:
 * Explain how the change will be tested.
 
 Additionally, for bonus points: Provide draft documentation you would expect to
-see if the change were accepted.
+see if the changes were accepted.
 
 ## Pull requests
 
@@ -66,7 +66,6 @@ Examples:
 
       llm/backend/mlx: support the llama architecture
       CONTRIBUTING: provide clarity on good commit messages, and bad
-      docs: simplify manual installation with shorter curl commands
 
 Bad Examples:
 

diff --git a/Dockerfile b/Dockerfile
@@ -4,7 +4,7 @@ ARG FLAVOR=${TARGETARCH}
 ARG PARALLEL=8
 
 ARG ROCMVERSION=6.3.3
-ARG ROCM7VERSION=7.0.2
+ARG ROCM7VERSION=7.1.1
 ARG JETPACK5VERSION=r35.4.1
 ARG JETPACK6VERSION=r36.4.0
 ARG CMAKEVERSION=3.31.2
@@ -42,8 +42,6 @@ ENV CC=clang CXX=clang++
 FROM base-${TARGETARCH} AS base
 ARG CMAKEVERSION
 RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
-COPY CMakeLists.txt CMakePresets.json .
-COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 ENV LDFLAGS=-s
 
 FROM base AS cpu
@@ -52,6 +50,8 @@ RUN dnf install -y gcc-toolset-11-gcc gcc-toolset-11-gcc-c++ \
     && rm -rf /var/cache/dnf/* /var/lib/dnf/*.sqlite* /var/lib/dnf/history.* /tmp/*
 ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH
 ARG PARALLEL
+COPY CMakeLists.txt CMakePresets.json .
+COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
     cmake --preset 'CPU' \
         && cmake --build --parallel ${PARALLEL} --preset 'CPU' \
@@ -62,6 +62,8 @@ ARG CUDA11VERSION=11.8
 RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-}
 ENV PATH=/usr/local/cuda-11/bin:$PATH
 ARG PARALLEL
+COPY CMakeLists.txt CMakePresets.json .
+COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
     cmake --preset 'CUDA 11' -DOLLAMA_RUNNER_DIR="cuda_v11" \
         && cmake --build --parallel ${PARALLEL} --preset 'CUDA 11' \
@@ -72,6 +74,8 @@ ARG CUDA12VERSION=12.8
 RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
 ENV PATH=/usr/local/cuda-12/bin:$PATH
 ARG PARALLEL
+COPY CMakeLists.txt CMakePresets.json .
+COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
     cmake --preset 'CUDA 12' -DOLLAMA_RUNNER_DIR="cuda_v12"\
         && cmake --build --parallel ${PARALLEL} --preset 'CUDA 12' \
@@ -83,6 +87,8 @@ ARG CUDA13VERSION=13.0
 RUN dnf install -y cuda-toolkit-${CUDA13VERSION//./-}
 ENV PATH=/usr/local/cuda-13/bin:$PATH
 ARG PARALLEL
+COPY CMakeLists.txt CMakePresets.json .
+COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
     cmake --preset 'CUDA 13' -DOLLAMA_RUNNER_DIR="cuda_v13" \
         && cmake --build --parallel ${PARALLEL} --preset 'CUDA 13' \
@@ -92,6 +98,8 @@ RUN --mount=type=cache,target=/root/.ccache \
 FROM base AS rocm-6
 ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH
 ARG PARALLEL
+COPY CMakeLists.txt CMakePresets.json .
+COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
     cmake --preset 'ROCm 6' -DOLLAMA_RUNNER_DIR="rocm" \
         && cmake --build --parallel ${PARALLEL} --preset 'ROCm 6' \
@@ -155,6 +163,8 @@ RUN --mount=type=cache,target=/root/.ccache \
         && cmake --install build --component CUDA --strip --parallel ${PARALLEL}
 
 FROM base AS vulkan
+COPY CMakeLists.txt CMakePresets.json .
+COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
     cmake --preset 'Vulkan' -DOLLAMA_RUNNER_DIR="vulkan" \
         && cmake --build --parallel --preset 'Vulkan' \
@@ -240,4 +250,4 @@ ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/lib/ollama
 
 EXPOSE 11434
 ENTRYPOINT ["/usr/bin/ollama"]
-CMD ["serve"]
+CMD ["serve"]
diff --git a/Makefile.sync b/Makefile.sync
@@ -1,6 +1,6 @@
 UPSTREAM=https://github.com/ggml-org/llama.cpp.git
 WORKDIR=llama/vendor
-FETCH_HEAD=3cfa9c3f125763305b4226bc032f1954f08990dc
+FETCH_HEAD=ec98e2002
 
 .PHONY: help
 help:
@@ -57,7 +57,7 @@ checkout: $(WORKDIR)
 $(WORKDIR):
 	git clone $(UPSTREAM) $(WORKDIR)
 
-.PHONE: format-patches
+.PHONY: format-patches
 format-patches: llama/patches
 	git -C $(WORKDIR) format-patch \
 		--no-signature \
@@ -66,7 +66,11 @@ format-patches: llama/patches
 		-o $(realpath $<) \
 		$(FETCH_HEAD)
 
-.PHONE: clean
+.PHONY: clean
 clean: checkout
 	@git -C $(WORKDIR) am --abort || true
 	$(RM) llama/patches/.*.patched
+
+.PHONY: print-base
+print-base:
+	@echo $(FETCH_HEAD)