diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 56a67f1edc8..b1b65938470 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -73,6 +73,7 @@ #define GGML_CUDA_CC_RDNA1 (GGML_CUDA_CC_OFFSET_AMD + 0x1010) // RX 5000 #define GGML_CUDA_CC_RDNA2 (GGML_CUDA_CC_OFFSET_AMD + 0x1030) // RX 6000, minimum for dp4a #define GGML_CUDA_CC_RDNA3 (GGML_CUDA_CC_OFFSET_AMD + 0x1100) // RX 7000, minimum for WMMA +#define GGML_CUDA_CC_RDNA3_IGPU (GGML_CUDA_CC_OFFSET_AMD + 0x1103) // Radeon 780M/760M iGPU #define GGML_CUDA_CC_RDNA3_5 (GGML_CUDA_CC_OFFSET_AMD + 0x1150) // AI 370, AI Max 395 laptops. #define GGML_CUDA_CC_RDNA4 (GGML_CUDA_CC_OFFSET_AMD + 0x1200) // RX 9000 @@ -80,6 +81,7 @@ #define GGML_CUDA_CC_IS_RDNA(cc) (cc >= GGML_CUDA_CC_RDNA1) #define GGML_CUDA_CC_IS_RDNA1(cc) (cc >= GGML_CUDA_CC_RDNA1 && cc < GGML_CUDA_CC_RDNA2) #define GGML_CUDA_CC_IS_RDNA2(cc) (cc >= GGML_CUDA_CC_RDNA2 && cc < GGML_CUDA_CC_RDNA3) +#define GGML_CUDA_CC_IS_RDNA3_IGPU(cc) (cc == GGML_CUDA_CC_RDNA3_IGPU) // gfx1103: Radeon 780M/760M iGPU #define GGML_CUDA_CC_IS_RDNA3_0(cc) (cc >= GGML_CUDA_CC_RDNA3 && cc < GGML_CUDA_CC_RDNA3_5) #define GGML_CUDA_CC_IS_RDNA3_5(cc) (cc >= GGML_CUDA_CC_RDNA3_5 && cc < GGML_CUDA_CC_RDNA4) #define GGML_CUDA_CC_IS_RDNA3(cc) (GGML_CUDA_CC_IS_RDNA3_0(cc) || GGML_CUDA_CC_IS_RDNA3_5(cc)) diff --git a/ggml/src/ggml-cuda/mmvq.cu b/ggml/src/ggml-cuda/mmvq.cu index 07b10167bc4..1ad5e48a46c 100644 --- a/ggml/src/ggml-cuda/mmvq.cu +++ b/ggml/src/ggml-cuda/mmvq.cu @@ -72,7 +72,7 @@ static constexpr __device__ mmvq_parameter_table_id get_device_table_id() { return MMVQ_PARAMETERS_RDNA4; #elif defined(RDNA3_0) return MMVQ_PARAMETERS_RDNA3_0; -#elif defined(RDNA2) || defined(RDNA3_5) +#elif defined(RDNA2) || defined(RDNA3_5) || defined(RDNA3_IGPU) return MMVQ_PARAMETERS_RDNA2; #elif defined(GCN) || defined(CDNA) return MMVQ_PARAMETERS_GCN; @@ -85,6 +85,9 @@ static __host__ mmvq_parameter_table_id get_device_table_id(int cc) { if (GGML_CUDA_CC_IS_RDNA4(cc)) { return MMVQ_PARAMETERS_RDNA4; } + if (GGML_CUDA_CC_IS_RDNA3_IGPU(cc)) { + return MMVQ_PARAMETERS_RDNA2; // iGPU (gfx1103) regresses with nwarps=8, use nwarps=1 + } if (GGML_CUDA_CC_IS_RDNA3_0(cc)) { return MMVQ_PARAMETERS_RDNA3_0; } diff --git a/ggml/src/ggml-cuda/vendors/hip.h b/ggml/src/ggml-cuda/vendors/hip.h index 898fec31e36..e6a52800ef1 100644 --- a/ggml/src/ggml-cuda/vendors/hip.h +++ b/ggml/src/ggml-cuda/vendors/hip.h @@ -221,9 +221,13 @@ #define RDNA3_5 #endif // defined(__gfx1150__) || defined(__gfx1151__) -#if defined(RDNA3) && !defined(RDNA3_5) +#if defined(__gfx1103__) +#define RDNA3_IGPU +#endif // defined(__gfx1103__) + +#if defined(RDNA3) && !defined(RDNA3_5) && !defined(RDNA3_IGPU) #define RDNA3_0 -#endif // defined(RDNA3) && !defined(RDNA3_5) +#endif // defined(RDNA3) && !defined(RDNA3_5) && !defined(RDNA3_IGPU) #if defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1032__) || defined(__gfx1033__) || \ defined(__gfx1034__) || defined(__gfx1035__) || defined(__gfx1036__) || defined(__gfx1037__)