Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
147 commits
Select commit Hold shift + click to select a range
ce29f69
docs: add logprobs to openapi (#13090)
ParthSareen Nov 14, 2025
72ff5b9
log: warn if user overrides detected (#13088)
dhiltgen Nov 14, 2025
d7fd721
tests: basic benchmarking test framework (#12964)
pdevine Nov 16, 2025
5d31242
discover: fix typos in runner.go (#13096)
lnicola Nov 16, 2025
a751bc1
llama: test case typo and readability improvements (#13078)
vignesh1507 Nov 16, 2025
4cea757
server: clean up manifest documentation (#12995)
pierwill Nov 16, 2025
d564982
readme: add Kdeps to community integrations (#11877)
jjuliano Nov 16, 2025
dd0ed0e
docs: fix typos in repository documentation (#10683)
omahs Nov 16, 2025
aa676b3
docs: link to ollama.com instead of hardcoding list of cloud models (…
jmorganca Nov 17, 2025
231cc87
app/ui: fix to point ollama client to ui backend in dev mode (#13079)
hoyyeva Nov 17, 2025
399eacf
ci: fix missing vulkan binaries in linux bundles (#13123)
dhiltgen Nov 17, 2025
2f36d76
bring back sysfs based VRAM information for AMD (#12871)
dhiltgen Nov 17, 2025
4aba2e8
discover: Support cgroups cores and memory limitations (#10292)
SiLeader Nov 18, 2025
1fd4cb8
app/cmd: restrict ollama:// URL scheme to supported paths (#13120)
hoyyeva Nov 18, 2025
584e2d6
Add deepseek v3.1 (#13063)
gr4ceG Nov 18, 2025
330f62a
docs: add Void Editor to community integrations (#13124)
ssam18 Nov 18, 2025
718961d
migrate to golangci-lint v2 (#13109)
mxyng Nov 18, 2025
440a382
fix(tokenizer): add special tokens to empty inputs (#13091)
mxyng Nov 18, 2025
8ed1adf
docs: fix typo in vscode.mdx (#13116)
seolyam Nov 18, 2025
92981ae
deepseekocr
mxyng Nov 1, 2025
0796d79
cuda: skip large batches
mxyng Nov 18, 2025
485da9f
win: exit instead of abort (#13138)
dhiltgen Nov 19, 2025
8de30b5
nomic-embed-text model implementation (#13071)
npardal Nov 19, 2025
9193563
Renderer for Cogito v2 (#13139)
gr4ceG Nov 19, 2025
b6e02cb
ggml: Automatically make tensors contiguous on reshape
jessegross Nov 19, 2025
53985b3
kvcache: Use SetRows to store cache data
jessegross Aug 18, 2025
604e43b
models: enable deepseek2 (deepseek v3.1 w/ MLA) on the new engine (#1…
pdevine Nov 19, 2025
eac5b8b
chore: mark vulkan shaders as vendored files
mxyng Nov 19, 2025
b2af509
nomic-embed: nomic-embed-text defaulted to ollama runner (#13144)
npardal Nov 19, 2025
cb485b2
kvcache: Run tests both with and without PermutedV
jessegross Nov 19, 2025
5c1063d
deepseek2: upgrade to run v3+ models (#13166)
mxyng Nov 20, 2025
d70e935
Parser for Cogito v2 (#13145)
gr4ceG Nov 20, 2025
dba62ff
discovery: fix cuda overlap case (#13176)
dhiltgen Nov 20, 2025
417a81f
app: open app instead of always navigating to / on connect (#13164)
jmorganca Nov 20, 2025
47e272c
app/cmd: update ollama help to navigate to ollama doc instead of gith…
hoyyeva Nov 20, 2025
efdd9b7
gguf: add split gguf loading
cvrunmin Nov 20, 2025
8b1b89a
docs: remove deprecated parameters (#13237)
EntropyYue Nov 26, 2025
fb7c898
ggml: fix cannot read split info
cvrunmin Nov 26, 2025
5f32d76
server: sort baselayers by split.no in create route
cvrunmin Nov 26, 2025
a12cabb
ggml: fix wrong param size of metaggml
cvrunmin Nov 27, 2025
b2ebfcc
server: get correct info from split ggufs while creating models
cvrunmin Nov 27, 2025
10dc89f
server: sanity check when creating model with split gguf
cvrunmin Nov 28, 2025
3e353c9
docs: add infos for split gguf
cvrunmin Nov 28, 2025
fa9f1ee
Merge branch 'main' into feat/split-gguf
cvrunmin Nov 28, 2025
469ac5b
server: more sanity check when loading split gguf
cvrunmin Nov 28, 2025
0c24896
docs: fix output formatting in faq.mdx (#13231)
kokes Nov 29, 2025
6d9f932
.gitattributes: add app/webview to linguist-vendored (#13274)
jmorganca Nov 30, 2025
467bbc0
jetpack: require exact match or skip cuda_jetpack* (#13288)
dhiltgen Dec 1, 2025
5b6a8e6
api/client: handle non-json streaming errors (#13007)
BruceMacD Dec 1, 2025
5541727
win: warn if ggml-base detected in PATH (#13289)
dhiltgen Dec 1, 2025
d3e0a0d
model: ministral w/ llama4 scaling (#13292)
pdevine Dec 2, 2025
f8f1071
CUDA: verify CC is supported by target library (#13298)
dhiltgen Dec 2, 2025
d771043
test: add ministral-3 (#13300)
dhiltgen Dec 2, 2025
5317202
llm: Don't always evict models on CPU-only systems
jessegross Nov 25, 2025
18b5958
test: avoid ministral tools test on low vram (#13302)
dhiltgen Dec 2, 2025
20aee96
Add Vulkan GPU support instructions in development.md (#13265)
chengcheng84 Dec 2, 2025
cc9555a
Update user message format for temperature query (#13256)
nathan-hook Dec 2, 2025
5554b60
Merge remote-tracking branch 'original/main' into rpc_clean
Gulianrdgd Dec 3, 2025
355d5ba
Updated and fixed again
Gulianrdgd Dec 3, 2025
3f30836
CUDA: filter devices on secondary discovery (#13317)
dhiltgen Dec 3, 2025
84a2ced
app: relay thinking false to server (#13319)
BruceMacD Dec 3, 2025
854d40e
ci: restore previous linter rules (#13322)
jmorganca Dec 4, 2025
0cf7794
ggml update to b7108 (#12992)
dhiltgen Dec 4, 2025
a03223b
cmd/bench: support writing benchmark output to file (#13263)
Eloitor Dec 4, 2025
0a844f8
convert: add deepseek converter (#12980)
pdevine Dec 4, 2025
7837a5b
ggml: Always set cache padding to 256
jessegross Dec 4, 2025
1108d8b
ggml: Enable flash attention for vision encoders
jessegross Dec 2, 2025
9191dfa
llm: Enable flash attention for mistral3 by default
jessegross Dec 4, 2025
31b8c6a
fix(api): correct Content-Type header for /api/chat and /api/generate…
ZeeeUs Dec 5, 2025
c146a13
ggml: handle all streams (#13350)
dhiltgen Dec 6, 2025
5a41d69
fs/ggml: write int32 and int64 values to gguf files (#13335)
jmorganca Dec 8, 2025
0c78723
readme: fix broken Swollama link in community integrations (#13370)
computerscienceiscool Dec 8, 2025
44e6a78
Merge branch 'main' into feat/split-gguf
cvrunmin Dec 8, 2025
5dae738
CI: use vendor base commit in cache keys (#13348)
dhiltgen Dec 8, 2025
e082d60
truncation: fixed runner truncation logic + removed server truncation…
npardal Dec 8, 2025
603ceef
refactor rope
mxyng Nov 18, 2025
d2f334c
model: add rnj-1 inference support (#13354)
jmorganca Dec 9, 2025
d475d1f
fix: qwen2.5vl metal argsort
mxyng Dec 8, 2025
0c5e5f6
parsers/renderers: olmo3 think (#13290)
ParthSareen Dec 9, 2025
2bccf8c
renderers/parsers: olmo3 instruct (#13383)
ParthSareen Dec 9, 2025
76f88ca
nomic-embed-text:v2: model implementation (#13162)
npardal Dec 9, 2025
bbbb6b2
app/ui: fix model capabilities not updating after download completion…
hoyyeva Dec 10, 2025
7cf6f18
app/ui: refactor to use Ollama endpoints for user auth and health che…
hoyyeva Dec 10, 2025
c34fc64
app/ui: use requestAnimationFrame to prevent bottom line cutoff in st…
hoyyeva Dec 10, 2025
b956930
feat: llama.cpp bump (17f7f4) for SSM performance improvements (#13408)
gabe-l-hart Dec 10, 2025
56b8fb0
cmd/bench: fix options table in cmd/bench/README.md (#13216)
pythongirl325 Dec 10, 2025
dac4f17
cmd/bench: fix binary name in README (#13276)
Eloitor Dec 10, 2025
1c4e85b
routes: add logprobs in tool calls (#13238)
Eason023 Dec 11, 2025
a838421
model: conversion and hyperparameter fixes for ministral and devstral…
jmorganca Dec 11, 2025
48e78e9
template: add yesterdayDate helper function (#13431)
jmorganca Dec 11, 2025
3475d91
embeddings: modified batch size (#13429)
npardal Dec 11, 2025
1eb5e75
openai: add v1/responses support (#13351)
drifkin Dec 11, 2025
2dfb744
model: fix rotary embeddings for ministral 3 (#13432)
jmorganca Dec 12, 2025
709f842
Update README.md (#13373)
computerscienceiscool Dec 12, 2025
93d45d7
docs: fix link to modelfile.mdx (#13220)
familom Dec 12, 2025
9b2035d
openai: add tool call appending to previous assistant message (#13434)
ParthSareen Dec 12, 2025
9f78228
docs: add docs for v1/responses and rework openai compat section (#1…
drifkin Dec 12, 2025
95fdd8d
fix: select and update models folder in settings (#13412)
hoyyeva Dec 12, 2025
de9ecfd
tidy up lint warnings on windows (#13430)
hoyyeva Dec 12, 2025
7730895
Enable Ollama engine by default (#13443)
dhiltgen Dec 12, 2025
3af5d3b
model: force rope factor 1.0 for Gemma 3 (#13445)
jmorganca Dec 12, 2025
bd6c1d6
flash attn: add auto mode for llama engine (#13052)
dhiltgen Dec 12, 2025
1b308e1
model: fix global layer rope scale values for gemma 3 (#13452)
jmorganca Dec 13, 2025
4ff8a69
model: default gemma 3 rope scale to 1.0, apply corrections based on …
jmorganca Dec 13, 2025
c81b9ec
Merge branch 'main' into feat/split-gguf
cvrunmin Dec 15, 2025
abe67ac
Revert "Enable Ollama engine by default" (#13481)
dhiltgen Dec 15, 2025
8dbc9e7
app/ui: handle unspecified bind addresses and wait for server in olla…
hoyyeva Dec 15, 2025
e3731fb
renderers: add olmo3.1 and olmo3 fixes (#13447)
ParthSareen Dec 15, 2025
aacd1cb
fix: define GGML_VERSION variables for proper SOVERSION expansion (#1…
nathannewyen Dec 15, 2025
2c63943
DeepseekV3 family renderer (#13180)
gr4ceG Dec 15, 2025
ffbe8e0
model: add olmo3 and olmo3.1 (#13415)
ParthSareen Dec 15, 2025
971d625
fix: qwen2.5 vl rope (#13486)
mxyng Dec 16, 2025
7b95087
Adding tool definitions to DeepseekV3 renderer (#13491)
gr4ceG Dec 16, 2025
7e3ea81
llama/parsers/renderers: nemotron 3 nano (#13489)
ParthSareen Dec 16, 2025
89eb795
parsers/renderers: use think from user for nemotron (#13492)
ParthSareen Dec 16, 2025
903b1fc
use ollama engine for bert models (#13501)
mxyng Dec 16, 2025
2dd029d
remove unnecessary code (#13502)
mxyng Dec 16, 2025
45c4739
types: ConfigV2 and RootFS (#13504)
BruceMacD Dec 16, 2025
f6a016f
revert granite-embedding (#13505)
mxyng Dec 16, 2025
a013693
DeepseekV3 Family Parser (#13484)
gr4ceG Dec 17, 2025
1c09403
types: add nested property support for tool definitions (#13508)
ParthSareen Dec 17, 2025
49a9c9b
GGML update to ec98e2002 (#13451)
dhiltgen Dec 17, 2025
0fadeff
Omit args and params in tool function def and calls (#13516)
gr4ceG Dec 18, 2025
522c11a
Revert "Omit args and params in tool function def and calls (#13516)"…
gr4ceG Dec 18, 2025
7325791
parsers/renderers: functiongemma (#13521)
ParthSareen Dec 18, 2025
8852220
add REQUIRES command to Modelfile (#13361)
jmorganca Dec 18, 2025
3286751
Merge branch 'main' into feat/split-gguf
cvrunmin Dec 19, 2025
2a0ce29
Updtae for ROCM
Dec 19, 2025
9c185fb
Merge branch 'rpc_clean' of https://github.com/NOLAI/ollama into rpc_…
Dec 19, 2025
95d45a4
Merge remote-tracking branch 'original/main' into rpc_clean
Dec 19, 2025
172b592
llm: Avoid integer underflow on llama engine memory layout
jessegross Dec 19, 2025
7ad0369
amd: use GTT on iGPUs on linux (#13196)
dhiltgen Dec 23, 2025
18fdcc9
docs: fix broken .md links and render issues (#13550)
Vallabh-1504 Dec 23, 2025
f5f74e1
docs: add version note for /v1/responses API (#13596)
majiayu000 Jan 3, 2026
2e78653
app/ui: add swift syntax highlighting support (#13574)
majiayu000 Jan 3, 2026
e1bdc23
docs: fix tool name mismatch and trailing commas in api.md example (#…
nathannewyen Jan 3, 2026
37f6f3a
server: return error when embedding contains NaN or Inf values (#13599)
majiayu000 Jan 3, 2026
d087e46
docs/capabilities/vision: fix curl related code snippet (#13615)
harrykiselev Jan 3, 2026
e51dead
preserve tool definition and call JSON ordering (#13525)
drifkin Jan 6, 2026
6c3faaf
olmo3: fix flaky test (#13629)
drifkin Jan 6, 2026
76912c0
x: add experimental agent loop (#13628)
ParthSareen Jan 6, 2026
626af2d
template: fix args-as-json rendering (#13636)
drifkin Jan 7, 2026
12e2b35
x: agent loop ux improvements (#13635)
ParthSareen Jan 7, 2026
ef58116
Merge remote-tracking branch 'original/main' into rpc_clean
Gulianrdgd Jan 7, 2026
5611325
Merge remote-tracking branch 'gguf/feat/split-gguf' into rpc_clean
Gulianrdgd Jan 7, 2026
6b86bed
Testing gguf
Gulianrdgd Jan 7, 2026
33e8adc
Ready!
Jan 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
4 changes: 4 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@ ml/backend/**/*.cu linguist-vendored
ml/backend/**/*.cuh linguist-vendored
ml/backend/**/*.m linguist-vendored
ml/backend/**/*.metal linguist-vendored
ml/backend/**/*.comp linguist-vendored
ml/backend/**/*.glsl linguist-vendored
ml/backend/**/CMakeLists.txt linguist-vendored

app/webview linguist-vendored

llama/build-info.cpp linguist-generated
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.s linguist-generated

Expand Down
18 changes: 15 additions & 3 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@ jobs:
outputs:
GOFLAGS: ${{ steps.goflags.outputs.GOFLAGS }}
VERSION: ${{ steps.goflags.outputs.VERSION }}
vendorsha: ${{ steps.changes.outputs.vendorsha }}
steps:
- uses: actions/checkout@v4
- name: Set environment
id: goflags
run: |
echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${GITHUB_REF_NAME#v}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_OUTPUT
echo VERSION="${GITHUB_REF_NAME#v}" >>$GITHUB_OUTPUT
echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${GITHUB_REF_NAME#v}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" | tee -a $GITHUB_OUTPUT
echo VERSION="${GITHUB_REF_NAME#v}" | tee -a $GITHUB_OUTPUT
echo vendorsha=$(make -f Makefile.sync print-base) | tee -a $GITHUB_OUTPUT

darwin-build:
runs-on: macos-14-xlarge
Expand Down Expand Up @@ -53,6 +55,9 @@ jobs:
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache-dependency-path: |
go.sum
Makefile.sync
- run: |
./scripts/build_darwin.sh
- name: Log build results
Expand Down Expand Up @@ -185,7 +190,7 @@ jobs:
- uses: actions/cache@v4
with:
path: ${{ github.workspace }}\.ccache
key: ccache-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.preset }}
key: ccache-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.preset }}-${{ needs.setup-environment.outputs.vendorsha }}
- name: Build target "${{ matrix.preset }}"
run: |
Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
Expand Down Expand Up @@ -249,6 +254,9 @@ jobs:
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache-dependency-path: |
go.sum
Makefile.sync
- name: Verify gcc is actually clang
run: |
$ErrorActionPreference='Continue'
Expand Down Expand Up @@ -302,6 +310,9 @@ jobs:
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache-dependency-path: |
go.sum
Makefile.sync
- uses: actions/download-artifact@v4
with:
pattern: depends-windows*
Expand Down Expand Up @@ -366,6 +377,7 @@ jobs:
bin/ollama) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
lib/ollama/*.so*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
lib/ollama/cuda_v*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
lib/ollama/vulkan*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
lib/ollama/rocm) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;;
Expand Down
18 changes: 10 additions & 8 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ jobs:
runs-on: ubuntu-latest
outputs:
changed: ${{ steps.changes.outputs.changed }}
vendorsha: ${{ steps.changes.outputs.vendorsha }}
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -37,6 +38,7 @@ jobs:
}

echo changed=$(changed 'llama/llama.cpp/**/*' 'ml/backend/ggml/ggml/**/*') | tee -a $GITHUB_OUTPUT
echo vendorsha=$(make -f Makefile.sync print-base) | tee -a $GITHUB_OUTPUT

linux:
needs: [changes]
Expand Down Expand Up @@ -83,7 +85,7 @@ jobs:
- uses: actions/cache@v4
with:
path: /github/home/.cache/ccache
key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}
key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}-${{ needs.changes.outputs.vendorsha }}
- run: |
cmake --preset ${{ matrix.preset }} ${{ matrix.flags }}
cmake --build --preset ${{ matrix.preset }} --parallel
Expand Down Expand Up @@ -178,7 +180,7 @@ jobs:
- uses: actions/cache@v4
with:
path: ${{ github.workspace }}\.ccache
key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}
key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}-${{ needs.changes.outputs.vendorsha }}
- run: |
Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -SkipAutomaticLocation -DevCmdArguments '-arch=x64 -no_logo'
Expand Down Expand Up @@ -206,6 +208,9 @@ jobs:
- uses: actions/setup-go@v5
with:
go-version-file: 'go.mod'
cache-dependency-path: |
go.sum
Makefile.sync
- uses: actions/setup-node@v4
with:
node-version: '20'
Expand All @@ -226,12 +231,9 @@ jobs:
if: always()
run: go test -count=1 -benchtime=1x ./...

# TODO(bmizerany): replace this heavy tool with just the
# tools/checks/binaries we want and then make them all run in parallel
# across jobs, not on a single tiny vm on Github Actions.
- uses: golangci/golangci-lint-action@v6
- uses: golangci/golangci-lint-action@v9
with:
args: --timeout 10m0s -v
only-new-issues: true

patches:
runs-on: ubuntu-latest
Expand All @@ -240,4 +242,4 @@ jobs:
- name: Verify patches apply cleanly and do not change files
run: |
make -f Makefile.sync clean checkout apply-patches sync
git diff --compact-summary --exit-code
git diff --compact-summary --exit-code
40 changes: 25 additions & 15 deletions .golangci.yaml
Original file line number Diff line number Diff line change
@@ -1,41 +1,51 @@
run:
timeout: 5m
version: "2"
linters:
enable:
- asasalint
- bidichk
- bodyclose
- containedctx
- gocheckcompilerdirectives
- gofmt
- gofumpt
- gosimple
- govet
- ineffassign
- intrange
- makezero
- misspell
- nilerr
- nolintlint
- nosprintfhostport
- staticcheck
- unconvert
- usetesting
- wastedassign
- whitespace
disable:
- usestdlibvars
- errcheck
linters-settings:
staticcheck:
checks:
- all
- -SA1019 # omit Deprecated check
- usestdlibvars
settings:
govet:
disable:
- unusedresult
staticcheck:
checks:
- all
- -QF* # disable quick fix suggestions
- -SA1019
- -ST1000 # package comment format
- -ST1003 # underscores in package names
- -ST1005 # error strings should not be capitalized
- -ST1012 # error var naming (ErrFoo)
- -ST1016 # receiver name consistency
- -ST1020 # comment on exported function format
- -ST1021 # comment on exported type format
- -ST1022 # comment on exported var format
- -ST1023 # omit type from declaration
severity:
default-severity: error
default: error
rules:
- linters:
- gofmt
- goimports
- intrange
severity: info
formatters:
enable:
- gofmt
- gofumpt
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cp

add_compile_definitions(NDEBUG GGML_VERSION=0x0 GGML_COMMIT=0x0)

# Define GGML version variables for shared library SOVERSION
# These are required by ggml/src/CMakeLists.txt for proper library versioning
set(GGML_VERSION_MAJOR 0)
set(GGML_VERSION_MINOR 0)
set(GGML_VERSION_PATCH 0)
set(GGML_VERSION "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}")

set(GGML_CPU ON)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
set_property(TARGET ggml PROPERTY EXCLUDE_FROM_ALL TRUE)
Expand Down
5 changes: 2 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ See the [development documentation](./docs/development.md) for instructions on h

* New features: new features (e.g. API fields, environment variables) add surface area to Ollama and make it harder to maintain in the long run as they cannot be removed without potentially breaking users in the future.
* Refactoring: large code improvements are important, but can be harder or take longer to review and merge.
* Documentation: small updates to fill in or correct missing documentation is helpful, however large documentation additions can be hard to maintain over time.
* Documentation: small updates to fill in or correct missing documentation are helpful, however large documentation additions can be hard to maintain over time.

### Issues that may not be accepted

Expand All @@ -43,7 +43,7 @@ Tips for proposals:
* Explain how the change will be tested.

Additionally, for bonus points: Provide draft documentation you would expect to
see if the change were accepted.
see if the changes were accepted.

## Pull requests

Expand All @@ -66,7 +66,6 @@ Examples:

llm/backend/mlx: support the llama architecture
CONTRIBUTING: provide clarity on good commit messages, and bad
docs: simplify manual installation with shorter curl commands

Bad Examples:

Expand Down
18 changes: 14 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ARG FLAVOR=${TARGETARCH}
ARG PARALLEL=8

ARG ROCMVERSION=6.3.3
ARG ROCM7VERSION=7.0.2
ARG ROCM7VERSION=7.1.1
ARG JETPACK5VERSION=r35.4.1
ARG JETPACK6VERSION=r36.4.0
ARG CMAKEVERSION=3.31.2
Expand Down Expand Up @@ -42,8 +42,6 @@ ENV CC=clang CXX=clang++
FROM base-${TARGETARCH} AS base
ARG CMAKEVERSION
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
ENV LDFLAGS=-s

FROM base AS cpu
Expand All @@ -52,6 +50,8 @@ RUN dnf install -y gcc-toolset-11-gcc gcc-toolset-11-gcc-c++ \
&& rm -rf /var/cache/dnf/* /var/lib/dnf/*.sqlite* /var/lib/dnf/history.* /tmp/*
ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH
ARG PARALLEL
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
RUN --mount=type=cache,target=/root/.ccache \
cmake --preset 'CPU' \
&& cmake --build --parallel ${PARALLEL} --preset 'CPU' \
Expand All @@ -62,6 +62,8 @@ ARG CUDA11VERSION=11.8
RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-}
ENV PATH=/usr/local/cuda-11/bin:$PATH
ARG PARALLEL
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
RUN --mount=type=cache,target=/root/.ccache \
cmake --preset 'CUDA 11' -DOLLAMA_RUNNER_DIR="cuda_v11" \
&& cmake --build --parallel ${PARALLEL} --preset 'CUDA 11' \
Expand All @@ -72,6 +74,8 @@ ARG CUDA12VERSION=12.8
RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
ENV PATH=/usr/local/cuda-12/bin:$PATH
ARG PARALLEL
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
RUN --mount=type=cache,target=/root/.ccache \
cmake --preset 'CUDA 12' -DOLLAMA_RUNNER_DIR="cuda_v12"\
&& cmake --build --parallel ${PARALLEL} --preset 'CUDA 12' \
Expand All @@ -83,6 +87,8 @@ ARG CUDA13VERSION=13.0
RUN dnf install -y cuda-toolkit-${CUDA13VERSION//./-}
ENV PATH=/usr/local/cuda-13/bin:$PATH
ARG PARALLEL
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
RUN --mount=type=cache,target=/root/.ccache \
cmake --preset 'CUDA 13' -DOLLAMA_RUNNER_DIR="cuda_v13" \
&& cmake --build --parallel ${PARALLEL} --preset 'CUDA 13' \
Expand All @@ -92,6 +98,8 @@ RUN --mount=type=cache,target=/root/.ccache \
FROM base AS rocm-6
ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH
ARG PARALLEL
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
RUN --mount=type=cache,target=/root/.ccache \
cmake --preset 'ROCm 6' -DOLLAMA_RUNNER_DIR="rocm" \
&& cmake --build --parallel ${PARALLEL} --preset 'ROCm 6' \
Expand Down Expand Up @@ -155,6 +163,8 @@ RUN --mount=type=cache,target=/root/.ccache \
&& cmake --install build --component CUDA --strip --parallel ${PARALLEL}

FROM base AS vulkan
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
RUN --mount=type=cache,target=/root/.ccache \
cmake --preset 'Vulkan' -DOLLAMA_RUNNER_DIR="vulkan" \
&& cmake --build --parallel --preset 'Vulkan' \
Expand Down Expand Up @@ -240,4 +250,4 @@ ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/lib/ollama

EXPOSE 11434
ENTRYPOINT ["/usr/bin/ollama"]
CMD ["serve"]
CMD ["serve"]
10 changes: 7 additions & 3 deletions Makefile.sync
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
UPSTREAM=https://github.com/ggml-org/llama.cpp.git
WORKDIR=llama/vendor
FETCH_HEAD=3cfa9c3f125763305b4226bc032f1954f08990dc
FETCH_HEAD=ec98e2002

.PHONY: help
help:
Expand Down Expand Up @@ -57,7 +57,7 @@ checkout: $(WORKDIR)
$(WORKDIR):
git clone $(UPSTREAM) $(WORKDIR)

.PHONE: format-patches
.PHONY: format-patches
format-patches: llama/patches
git -C $(WORKDIR) format-patch \
--no-signature \
Expand All @@ -66,7 +66,11 @@ format-patches: llama/patches
-o $(realpath $<) \
$(FETCH_HEAD)

.PHONE: clean
.PHONY: clean
clean: checkout
@git -C $(WORKDIR) am --abort || true
$(RM) llama/patches/.*.patched

.PHONY: print-base
print-base:
@echo $(FETCH_HEAD)
Loading
Loading