Skip to content

Commit 84b626e

Browse files
authored
excuter-cuda:解决bf16,fp16的打印问题 (#77)
* py: 1.新增类似transformer的Config类 * py: 1.deepxutil/safetensor转换deepx * cuda:reshape调用错误修正 * cuda:bf16 存/恢复 存在问题 * excuter-cuda:解决bf16,fp16的打印问题
1 parent 64c0046 commit 84b626e

25 files changed

Lines changed: 422 additions & 412 deletions

File tree

excuter/cpp-common/src/stdutil/fs.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,14 @@ namespace stdutil
1717

1818
void save(const byte *data, size_t size, const string &path)
1919
{
20-
2120
ofstream ofs(path, ios::binary | ios::out | ios::trunc);
21+
if (!ofs.is_open()) {
22+
throw std::runtime_error("Failed to open file for writing: " + path);
23+
}
2224
ofs.write(reinterpret_cast<const char *>(data), size);
25+
if (!ofs) {
26+
throw std::runtime_error("Failed to write data to file: " + path);
27+
}
2328
ofs.close();
2429
}
2530

excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ namespace deepx::tf
5454
case Precision::Float32:
5555
reshape<Author, float>(*mem->gettensor<float>(this->args[0].textvalue), shape, *mem->gettensor<float>(this->returns[0].textvalue));
5656
break;
57+
case Precision::Float16:
58+
reshape<Author, half>(*mem->gettensor<half>(this->args[0].textvalue), shape, *mem->gettensor<half>(this->returns[0].textvalue));
59+
break;
60+
case Precision::BFloat16:
61+
reshape<Author, nv_bfloat16>(*mem->gettensor<nv_bfloat16>(this->args[0].textvalue), shape, *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
62+
break;
5763
case Precision::Int64:
5864
reshape<Author, int64_t>(*mem->gettensor<int64_t>(this->args[0].textvalue), shape, *mem->gettensor<int64_t>(this->returns[0].textvalue));
5965
break;

excuter/op-mem-cuda/src/deepx/tf/io.hpp

Lines changed: 58 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,24 +24,68 @@ namespace deepx::tf
2424
int run(shared_ptr<MemBase> mem, string &error) override
2525
{
2626
string name = this->args[0].textvalue;
27-
if (mem->existstensor(name))
28-
{
29-
auto t = mem->gettensor(name);
30-
if (this->args.size() == 1)
31-
{
32-
tensorfunc::print<Author, void>(*t);
33-
}
34-
else
35-
{
36-
tensorfunc::print<Author, void>(*t, this->args[1].textvalue);
37-
}
38-
}
39-
else
40-
{
27+
if (!mem->existstensor(name))
28+
{
4129
std::cerr << "print " << name << " not found" << std::endl;
4230
error = "print " + name + " not found";
4331
return 1;
4432
}
33+
string format="";
34+
if (this->args.size() > 1){
35+
format = this->args[1].textvalue;
36+
}
37+
38+
Precision dtype = mem->gettensor(name)->shape.dtype;
39+
switch (dtype)
40+
{
41+
case Precision::Float64:{
42+
auto t = mem->gettensor<double>(name);
43+
tensorfunc::print<Author,double>(*t,format);
44+
break;
45+
}
46+
case Precision::Float32:{
47+
auto t = mem->gettensor<float>(name);
48+
tensorfunc::print<Author>(*t,format);
49+
break;
50+
}
51+
case Precision::Float16:{
52+
auto t = mem->gettensor<half>(name);
53+
tensorfunc::print<Author>(*t,format);
54+
break;
55+
}
56+
case Precision::BFloat16:{
57+
auto t = mem->gettensor<nv_bfloat16>(name);
58+
tensorfunc::print<Author>(*t,format);
59+
break;
60+
}
61+
case Precision::Int64:{
62+
auto t = mem->gettensor<int64_t>(name);
63+
tensorfunc::print<Author>(*t,format);
64+
break;
65+
}
66+
case Precision::Int32:{
67+
auto t = mem->gettensor<int32_t>(name);
68+
tensorfunc::print<Author>(*t,format);
69+
break;
70+
}
71+
case Precision::Int16:{
72+
auto t = mem->gettensor<int16_t>(name);
73+
tensorfunc::print<Author>(*t,format);
74+
break;
75+
}
76+
case Precision::Int8:{
77+
auto t = mem->gettensor<int8_t>(name);
78+
tensorfunc::print<Author>(*t,format);
79+
break;
80+
}
81+
case Precision::Bool:{
82+
auto t = mem->gettensor<bool>(name);
83+
tensorfunc::print<Author,bool>(*t,format);
84+
break;
85+
}
86+
default:
87+
break;
88+
}
4589
return 0;
4690
}
4791

front/py/deepx/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
from .tensor import Tensor,Shape,Number
22
from deepx.nn.functional import * # 导入所有functional函数
33
from deepx.nn.functional import __all__ as _func_all # 获取functional的导出列表
4-
4+
from deepx.utils import __all__ as _utils_all # 获取utils的导出列表
55
__all__ = [
66
#tensor
77
'Tensor','Shape','Number',
8-
*_func_all
8+
*_func_all,
9+
*_utils_all,
910
]
1011

1112
# 为了支持 import deepx as dx 的用法

front/py/deepx/nn/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from .deepxir import *
2-
2+
from .modules import __all__ as _modules_all
33
__all__ = [
4-
"DeepxIR","DeepxIRResp"
4+
"DeepxIR","DeepxIRResp",
5+
*_modules_all
56
]

front/py/deepx/transformer/modeling_rope_utils.py

Lines changed: 24 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,46 @@
1-
from typing import Tuple
1+
from typing import Tuple,Optional
22
import math
3+
from deepx.utils import Config
34
from deepx import arange,Tensor,where
45

5-
def _compute_default_rope_parameters(config:dict={
6-
"rope_theta":10000.0,
7-
"head_dim":0,
8-
"partial_rotary_factor":1.0,
9-
}) -> Tuple[Tensor, float]:
10-
partial_rotary_factor = config.get("partial_rotary_factor", 1.0)
11-
dim = config["head_dim"]* partial_rotary_factor
12-
# 计算逆频率
13-
base=config["rope_theta"]
14-
inv_freq = 1.0 / (base ** (arange(0, dim, 2, dtype='float64')/ dim))
15-
return inv_freq, 1.0
6+
def _compute_default_rope_parameters(config:Config=None,seq_len: Optional[int] = None, **rope_kwargs) -> Tuple[Tensor, float]:
7+
if len(rope_kwargs) > 0:
8+
base = rope_kwargs["base"]
9+
dim = rope_kwargs["dim"]
10+
elif config is not None:
11+
base = config.rope_theta
12+
partial_rotary_factor = config.partial_rotary_factor if hasattr(config, "partial_rotary_factor") else 1.0
13+
head_dim = getattr(config, "head_dim", None) or config.hidden_size // config.num_attention_heads
14+
dim = int(head_dim * partial_rotary_factor)
15+
16+
attention_factor = 1.0 # Unused in this type of RoPE
17+
18+
# Compute the inverse frequencies
19+
inv_freq = 1.0 / (base ** (arange(0, dim, 2, dtype="int64").float() / dim))
20+
return inv_freq, attention_factor
1621

17-
def _compute_llama3_parameters(config:dict={
18-
"rope_theta":10000.0,
19-
"head_dim":0,
20-
"partial_rotary_factor":1.0,
21-
"factor":8,
22-
"low_freq_factor":1,
23-
"high_freq_factor":4,
24-
"old_context_len":8192,
25-
"seq_len":None
26-
}) -> Tuple[Tensor, float]:
22+
def _compute_llama3_parameters(config:Config,seq_len: Optional[int] = None,**rope_kwargs) -> Tuple[Tensor, float]:
2723
# Gets the default RoPE parameters
28-
inv_freq, attention_factor = _compute_default_rope_parameters(config)
24+
inv_freq, attention_factor = _compute_default_rope_parameters(config, seq_len, **rope_kwargs)
2925

30-
factor = config["rope_scaling"]["factor"] # `8` in the original implementation
31-
low_freq_factor = config["rope_scaling"]["low_freq_factor"] # `1` in the original implementation
32-
high_freq_factor = config["rope_scaling"]["high_freq_factor"] # `4` in the original implementation
33-
old_context_len = config["rope_scaling"]["original_max_position_embeddings"] # `8192` in the original implementation
26+
factor = config.rope_scaling["factor"] # `8` in the original implementation
27+
low_freq_factor = config.rope_scaling["low_freq_factor"] # `1` in the original implementation
28+
high_freq_factor = config.rope_scaling["high_freq_factor"] # `4` in the original implementation
29+
old_context_len = config.rope_scaling["original_max_position_embeddings"] # `8192` in the original implementation
3430

3531
low_freq_wavelen = old_context_len / low_freq_factor
3632
high_freq_wavelen = old_context_len / high_freq_factor
3733

3834
wavelen = 2 * math.pi / inv_freq
39-
wavelen.print()
4035
# wavelen < high_freq_wavelen: do nothing
4136
# wavelen > low_freq_wavelen: divide by factor
4237
inv_freq_llama = where(wavelen > low_freq_wavelen, inv_freq / factor, inv_freq)
4338
# otherwise: interpolate between the two, using a smooth factor
4439
smooth_factor = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor)
4540
smoothed_inv_freq = (1 - smooth_factor) * inv_freq_llama / factor + smooth_factor * inv_freq_llama
4641
is_medium_freq = ~(wavelen < high_freq_wavelen) * ~(wavelen > low_freq_wavelen)
47-
is_medium_freq.print()
48-
# TODO 这一步执行后,会导致an illegal memory access was encountered
4942
inv_freq_llama = where(is_medium_freq, smoothed_inv_freq, inv_freq_llama)
50-
is_medium_freq.print()
51-
inv_freq_llama.print()
43+
5244
return inv_freq_llama, attention_factor
5345

5446
ROPE_INIT_FUNCTIONS = {

front/py/deepx/transformer/models/llama/attention.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from typing import Optional,Tuple
2-
from deepx.nn.modules import Module,Linear
2+
from deepx import nn
33
from deepx import Tensor,matmul,softmax,cat,dropout as dropout_func
4+
from deepx.nn.modules import Module
5+
from deepx.utils import Config
46

57

68

@@ -52,7 +54,9 @@ def eager_attention_forward(
5254
return attn_output, attn_weights
5355

5456
class LlamaAttention(Module):
55-
def __init__(self, config:dict, layer_idx: int):
57+
"""Multi-headed attention from 'Attention Is All You Need' paper"""
58+
59+
def __init__(self, config: Config, layer_idx: int):
5660
super().__init__()
5761
self.config = config
5862
self.layer_idx = layer_idx
@@ -62,19 +66,20 @@ def __init__(self, config:dict, layer_idx: int):
6266
self.attention_dropout = config.attention_dropout
6367
self.is_causal = True
6468

65-
self.q_proj = Linear(
69+
self.q_proj = nn.Linear(
6670
config.hidden_size, config.num_attention_heads * self.head_dim, bias=config.attention_bias
6771
)
68-
self.k_proj = Linear(
72+
self.k_proj = nn.Linear(
6973
config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.attention_bias
7074
)
71-
self.v_proj = Linear(
75+
self.v_proj = nn.Linear(
7276
config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.attention_bias
7377
)
74-
self.o_proj = Linear(
78+
self.o_proj = nn.Linear(
7579
config.num_attention_heads * self.head_dim, config.hidden_size, bias=config.attention_bias
7680
)
7781

82+
7883
def forward(
7984
self,
8085
hidden_states: Tensor,
@@ -90,17 +95,16 @@ def forward(
9095

9196
cos, sin = position_embeddings
9297
query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
93-
94-
95-
attn_output, attn_weights = attention_interface(
98+
99+
100+
attn_output, attn_weights =eager_attention_forward(
96101
self,
97102
query_states,
98103
key_states,
99104
value_states,
100105
attention_mask,
101-
dropout=0.0 if not self.training else self.attention_dropout,
102106
scaling=self.scaling,
103-
**kwargs,
107+
dropout=0.0 if not self.training else self.attention_dropout
104108
)
105109

106110
attn_output = attn_output.reshape(*input_shape, -1)

front/py/deepx/transformer/models/llama/embedding.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
from deepx.nn.modules import Module
2-
from deepx import Tensor,concat
2+
from deepx import cat
33
from deepx.transformer.modeling_rope_utils import ROPE_INIT_FUNCTIONS
4-
4+
from deepx.utils import Config
55
# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py
66
class LlamaRotaryEmbedding(Module):
7-
def __init__(self,config:dict):
7+
def __init__(self,config:Config):
88
super().__init__()
99
# 最大序列长度
10-
self.max_seq_len_cached = config["max_position_embeddings"]
10+
self.max_seq_len_cached = config.max_position_embeddings
1111
# 原始最大序列长度
12-
self.original_max_seq_len = config["max_position_embeddings"]
12+
self.original_max_seq_len = config.max_position_embeddings
1313
# 旋转类型
14-
self.rope_type=config["rope_scaling"]["rope_type"]
14+
self.rope_type=config.rope_scaling.rope_type
1515
# 旋转初始化函数
1616
self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type]
1717
# 旋转初始化函数
@@ -39,7 +39,7 @@ def __init__(self,config:dict):
3939

4040
def forward(self, x, position_ids):
4141
# 扩展旋转频率
42-
inv_freq_expanded = self.inv_freq[None, :, None].todtype('float32').expand((position_ids.shape[0], -1, 1))
42+
inv_freq_expanded = self.inv_freq[None, :, None].float().expand((position_ids.shape[0], -1, 1))
4343

4444
# 使用torch.unsqueeze和type转换替代索引操作
4545
position_ids_expanded = position_ids[:, None, :].float()
@@ -48,7 +48,7 @@ def forward(self, x, position_ids):
4848
# 计算频率
4949
freqs = (inv_freq_expanded @ position_ids_expanded).T
5050
# 拼接频率
51-
emb = concat((freqs, freqs), dim=-1)
51+
emb = cat((freqs, freqs), dim=-1)
5252
# 计算余弦和正弦
5353
cos = emb.cos()
5454
sin = emb.sin()

front/py/deepx/transformer/models/llama/groupedquery_attention.py

Lines changed: 0 additions & 12 deletions
This file was deleted.

front/py/deepx/utils/__init__.py

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,5 @@
1-
from .tensor import Tensor
2-
from .creation import zeros, ones, arange
3-
from .elementwise import add, sub, mul, div
4-
from .matmul import matmul, dot
5-
from .reduction import sum, mean, max, min
6-
from .shape import reshape, transpose
7-
from .comparison import lt, gt, eq
8-
from .trigonometric import sin, cos, tan
1+
from .config import Config
92

103
__all__ = [
11-
'Tensor',
12-
'zeros', 'ones', 'arange',
13-
'add', 'sub', 'mul', 'div',
14-
'matmul', 'dot',
15-
'sum', 'mean', 'max', 'min',
16-
'reshape', 'transpose',
17-
'lt', 'gt', 'eq',
18-
'sin', 'cos', 'tan'
19-
]
4+
'Config',
5+
]

0 commit comments

Comments
 (0)