-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsetup.py
More file actions
183 lines (153 loc) · 6.53 KB
/
setup.py
File metadata and controls
183 lines (153 loc) · 6.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
"""
XERV CRAYON SETUP v5.3.5 - WITH C++ EXTENSIONS
==============================================
Builds native extensions for maximum performance on CPU (AVX2), CUDA, and ROCm
"""
import os
import sys
import platform
import shutil
import sysconfig
import subprocess
from setuptools import setup, find_packages, Extension
from setuptools.command.build_ext import build_ext
VERSION = "5.3.6"
class CustomBuildExt(build_ext):
"""Custom build extension with CUDA support and fallback for missing compilers"""
def build_extension(self, ext):
try:
# Special handling for CUDA extensions
if ext.name.endswith('_cuda'):
self._build_cuda_extension(ext)
else:
super().build_extension(ext)
print(f"Successfully built: {ext.name}")
except Exception as e:
print(f"Warning: Failed to build {ext.name}: {e}")
def _build_cuda_extension(self, ext):
"""Build CUDA extension using nvcc"""
cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
nvcc = shutil.which('nvcc') or (os.path.join(cuda_home, 'bin', 'nvcc') if cuda_home else None)
if not nvcc or not os.path.exists(nvcc):
raise RuntimeError("NVCC not found")
# Build directory
build_temp = os.path.join(self.build_temp, ext.name)
os.makedirs(build_temp, exist_ok=True)
# Output directory
build_lib = os.path.join(self.build_lib, 'crayon', 'c_ext')
os.makedirs(build_lib, exist_ok=True)
# Source file
cuda_src = ext.sources[0]
# Object file
obj_file = os.path.join(build_temp, 'cuda_engine.o')
# Library file
lib_name = f"{ext.name}{sysconfig.get_config_var('EXT_SUFFIX')}"
lib_file = os.path.join(build_lib, lib_name)
# Include directories
include_dirs = [
sysconfig.get_paths()['include'], # Python headers
os.path.join(os.path.dirname(nvcc), '..', 'include'), # CUDA headers
]
include_flags = ' '.join(f'-I"{d}"' for d in include_dirs if os.path.exists(d))
# CUDA architecture flags (compile for common GPUs)
gpu_arch_flags = '-gencode=arch=compute_70,code=sm_70 ' \
'-gencode=arch=compute_75,code=sm_75 ' \
'-gencode=arch=compute_80,code=sm_80 ' \
'-gencode=arch=compute_86,code=sm_86 ' \
'-gencode=arch=compute_89,code=sm_89 ' \
'-gencode=arch=compute_90,code=sm_90'
# Compile CUDA to object
compile_cmd = f'"{nvcc}" -c "{cuda_src}" -o "{obj_file}" {include_flags} ' \
f'-O3 --compiler-options "-fPIC" -std=c++17 {gpu_arch_flags}'
print(f"Compiling CUDA extension: {compile_cmd}")
subprocess.check_call(compile_cmd, shell=True)
# Link into shared library
link_cmd = f'"{nvcc}" -shared "{obj_file}" -o "{lib_file}" ' \
f'-L"{os.path.join(os.path.dirname(nvcc), "..", "lib64")}" -lcudart'
print(f"Linking CUDA extension: {link_cmd}")
subprocess.check_call(link_cmd, shell=True)
# Copy to final destination
dest_file = os.path.join(self.get_ext_fullpath(ext.name))
os.makedirs(os.path.dirname(dest_file), exist_ok=True)
shutil.copy2(lib_file, dest_file)
def get_extensions():
"""Get list of C/C++ extensions to build"""
extensions = []
# Use relative paths from setup.py location
c_ext_dir = os.path.join("src", "crayon", "c_ext")
# CPU EXTENSION
cpu_sources = []
cpu_engine_path = os.path.join(c_ext_dir, "cpu_engine.cpp")
crayon_module_path = os.path.join(c_ext_dir, "crayon_module.c")
simd_ops_path = os.path.join(c_ext_dir, "simd_ops.c")
if os.path.exists(cpu_engine_path):
cpu_sources.append(cpu_engine_path)
elif os.path.exists(crayon_module_path):
cpu_sources.extend([crayon_module_path, simd_ops_path])
if cpu_sources:
if platform.system() == 'Windows':
extra_args = ['/O2', '/std:c++17', '/W3', '/wd4244', '/wd4267']
else:
extra_args = ['-O3', '-std=c++17', '-fPIC', '-Wall']
if platform.machine() in ('x86_64', 'AMD64'):
extra_args.extend(['-mavx2', '-mfma'])
cpu_ext = Extension(
'crayon.c_ext.crayon_cpu',
sources=cpu_sources,
include_dirs=[c_ext_dir],
extra_compile_args=extra_args,
language='c++'
)
extensions.append(cpu_ext)
# CUDA EXTENSION (Linux only - requires nvcc)
if platform.system() != 'Windows':
cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
nvcc = shutil.which('nvcc') or (os.path.join(cuda_home, 'bin', 'nvcc') if cuda_home else None)
cuda_src = os.path.join(c_ext_dir, "gpu_engine_cuda.cu")
if nvcc and os.path.exists(nvcc) and os.path.exists(cuda_src) and not os.environ.get('CRAYON_SKIP_CUDA'):
cuda_ext = Extension(
'crayon.c_ext.crayon_cuda',
sources=[cuda_src],
include_dirs=[c_ext_dir],
language='c++'
)
extensions.append(cuda_ext)
print(f"CUDA extension configured (NVCC: {nvcc})")
return extensions
build_extensions = '--no-extensions' not in sys.argv
if build_extensions:
try:
extensions = get_extensions()
except Exception as e:
print(f"Extension setup failed: {e}")
extensions = []
else:
extensions = []
sys.argv.remove('--no-extensions')
setup(
name="xerv-crayon",
version=VERSION,
author="Xerv Research Engineering Division",
description="Omni-Backend Tokenizer - CPU (AVX2/512), CUDA (NVIDIA), ROCm (AMD)",
long_description=open("README.md", encoding="utf-8").read(),
long_description_content_type="text/markdown",
packages=find_packages("src"),
package_dir={"": "src"},
python_requires=">=3.8,<3.14",
install_requires=["numpy>=1.21.0"],
ext_modules=extensions,
cmdclass={'build_ext': CustomBuildExt},
package_data={
"crayon": [
"resources/dat/*.dat",
"resources/dat/*.json",
"resources/*.txt",
"c_ext/*.h",
"c_ext/*.c",
"c_ext/*.cpp",
"c_ext/*.cu",
"c_ext/*.hip",
]
},
include_package_data=True,
)