Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,8 @@
#define GGML_DEFAULT_N_THREADS 4
#define GGML_DEFAULT_GRAPH_SIZE 2048

#define GGML_SCHED_MAX_SPLIT_BITS 12 // log2(4096)

#if UINTPTR_MAX == 0xFFFFFFFF
#define GGML_MEM_ALIGN 4
#elif defined(__EMSCRIPTEN__)
Expand Down
8 changes: 8 additions & 0 deletions ggml/src/ggml-backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1030,6 +1030,8 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
GGML_ABORT("%s: failed to initialize context\n", __func__);
}

graph->uid = ggml_graph_next_uid();

// pass 1: assign backends to ops with pre-allocated inputs
for (int i = 0; i < graph->n_leafs; i++) {
struct ggml_tensor * leaf = graph->leafs[i];
Expand Down Expand Up @@ -1477,6 +1479,12 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
assert(graph_copy->size > graph_copy->n_leafs);
graph_copy->leafs[graph_copy->n_leafs++] = leaf;
}

// set ids for all splits
GGML_ASSERT(sched->n_splits < (1 << GGML_SCHED_MAX_SPLIT_BITS));
for (int i = 0; i < sched->n_splits; ++i) {
sched->splits[i].graph.uid = graph->uid | ((uint64_t)(i + 1) << (64 - GGML_SCHED_MAX_SPLIT_BITS));
}
}

static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-cuda/common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,7 @@ struct ggml_cuda_graph {
std::vector<cudaGraphNode_t> nodes;
bool disable_due_to_gpu_arch = false;
bool warmup_complete = false;
uint64_t last_graph_uid = 0;
struct node_properties {
ggml_tensor node;
void * node_src_data_ptrs[GGML_MAX_SRC];
Expand Down
9 changes: 9 additions & 0 deletions ggml/src/ggml-cuda/ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3060,6 +3060,15 @@ static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx
const void * graph_key = ggml_cuda_graph_get_key(cgraph);
ggml_cuda_graph * graph = cuda_ctx->cuda_graph(graph_key);

if (cgraph->uid != 0 &&
cgraph->uid == graph->last_graph_uid) {
GGML_LOG_DEBUG("CUDA Graph id %zu reused\n", cgraph->uid);
GGML_ASSERT((int)graph->node_props.size() == cgraph->n_nodes);
return false;
}

graph->last_graph_uid = cgraph->uid;

// Check if the graph size has changed
if ((int)graph->node_props.size() != cgraph->n_nodes) {
res = true;
Expand Down
4 changes: 4 additions & 0 deletions ggml/src/ggml-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ extern "C" {

void ggml_print_backtrace(void);

uint64_t ggml_graph_next_uid(void);

#ifndef MIN
# define MIN(a, b) ((a) < (b) ? (a) : (b))
#endif
Expand Down Expand Up @@ -338,6 +340,8 @@ struct ggml_cgraph {
struct ggml_hash_set visited_hash_set;

enum ggml_cgraph_eval_order order;

uint64_t uid;
};

// returns a slice of cgraph with nodes [i0, i1)
Expand Down
15 changes: 15 additions & 0 deletions ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,19 @@

#define UNUSED GGML_UNUSED

uint64_t ggml_graph_next_uid(void) {
#ifdef _MSC_VER
static volatile long long counter = 1;
long long ret = (uint64_t) _InterlockedIncrement64(&counter) - 1;
GGML_ASSERT(ret < (1ULL << (64 - GGML_SCHED_MAX_SPLIT_BITS)));
#else
static uint64_t counter = 1;
uint64_t ret = __atomic_fetch_add(&counter, 1, __ATOMIC_RELAXED);
GGML_ASSERT(ret < (1ULL << (64 - GGML_SCHED_MAX_SPLIT_BITS)));
#endif
return ret;
}

// Needed for ggml_fp32_to_bf16_row()
#if defined(__AVX512BF16__)
#if defined(_MSC_VER)
Expand Down Expand Up @@ -7098,6 +7111,7 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz
/*.use_counts =*/ use_counts_ptr,
/*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr },
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
/*.uid =*/ 0,
};

ggml_hash_set_reset(&cgraph->visited_hash_set);
Expand Down Expand Up @@ -7125,6 +7139,7 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1)
/*.use_counts =*/ cgraph0->use_counts,
/*.visited_hash_set =*/ cgraph0->visited_hash_set,
/*.order =*/ cgraph0->order,
/*.uid =*/ 0
};

return cgraph;
Expand Down
Loading