diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 11d3e8a8167..563528da4d4 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -232,6 +232,8 @@ #define GGML_DEFAULT_N_THREADS 4 #define GGML_DEFAULT_GRAPH_SIZE 2048 +#define GGML_SCHED_MAX_SPLIT_BITS 12 // log2(4096) + #if UINTPTR_MAX == 0xFFFFFFFF #define GGML_MEM_ALIGN 4 #elif defined(__EMSCRIPTEN__) diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 1a555bf2a4d..b3fef7038af 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -1030,6 +1030,8 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra GGML_ABORT("%s: failed to initialize context\n", __func__); } + graph->uid = ggml_graph_next_uid(); + // pass 1: assign backends to ops with pre-allocated inputs for (int i = 0; i < graph->n_leafs; i++) { struct ggml_tensor * leaf = graph->leafs[i]; @@ -1477,6 +1479,12 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra assert(graph_copy->size > graph_copy->n_leafs); graph_copy->leafs[graph_copy->n_leafs++] = leaf; } + + // set ids for all splits + GGML_ASSERT(sched->n_splits < (1 << GGML_SCHED_MAX_SPLIT_BITS)); + for (int i = 0; i < sched->n_splits; ++i) { + sched->splits[i].graph.uid = graph->uid | ((uint64_t)(i + 1) << (64 - GGML_SCHED_MAX_SPLIT_BITS)); + } } static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) { diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 8a4246223b5..ae8f015b014 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -1183,6 +1183,7 @@ struct ggml_cuda_graph { std::vector nodes; bool disable_due_to_gpu_arch = false; bool warmup_complete = false; + uint64_t last_graph_uid = 0; struct node_properties { ggml_tensor node; void * node_src_data_ptrs[GGML_MAX_SRC]; diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 3113de017f0..c1982631c89 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -3060,6 +3060,15 @@ static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx const void * graph_key = ggml_cuda_graph_get_key(cgraph); ggml_cuda_graph * graph = cuda_ctx->cuda_graph(graph_key); + if (cgraph->uid != 0 && + cgraph->uid == graph->last_graph_uid) { + GGML_LOG_DEBUG("CUDA Graph id %zu reused\n", cgraph->uid); + GGML_ASSERT((int)graph->node_props.size() == cgraph->n_nodes); + return false; + } + + graph->last_graph_uid = cgraph->uid; + // Check if the graph size has changed if ((int)graph->node_props.size() != cgraph->n_nodes) { res = true; diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index 0639db362e7..e28f8e2e06a 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -30,6 +30,8 @@ extern "C" { void ggml_print_backtrace(void); +uint64_t ggml_graph_next_uid(void); + #ifndef MIN # define MIN(a, b) ((a) < (b) ? (a) : (b)) #endif @@ -338,6 +340,8 @@ struct ggml_cgraph { struct ggml_hash_set visited_hash_set; enum ggml_cgraph_eval_order order; + + uint64_t uid; }; // returns a slice of cgraph with nodes [i0, i1) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 0142498d967..c01ac756b93 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -53,6 +53,19 @@ #define UNUSED GGML_UNUSED +uint64_t ggml_graph_next_uid(void) { +#ifdef _MSC_VER + static volatile long long counter = 1; + long long ret = (uint64_t) _InterlockedIncrement64(&counter) - 1; + GGML_ASSERT(ret < (1ULL << (64 - GGML_SCHED_MAX_SPLIT_BITS))); +#else + static uint64_t counter = 1; + uint64_t ret = __atomic_fetch_add(&counter, 1, __ATOMIC_RELAXED); + GGML_ASSERT(ret < (1ULL << (64 - GGML_SCHED_MAX_SPLIT_BITS))); +#endif + return ret; +} + // Needed for ggml_fp32_to_bf16_row() #if defined(__AVX512BF16__) #if defined(_MSC_VER) @@ -7098,6 +7111,7 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz /*.use_counts =*/ use_counts_ptr, /*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr }, /*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT, + /*.uid =*/ 0, }; ggml_hash_set_reset(&cgraph->visited_hash_set); @@ -7125,6 +7139,7 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) /*.use_counts =*/ cgraph0->use_counts, /*.visited_hash_set =*/ cgraph0->visited_hash_set, /*.order =*/ cgraph0->order, + /*.uid =*/ 0 }; return cgraph;