array2d
diff --git a/‎excuter/cpp-common/src/stdutil/fs.cpp‎
Lines changed: 6 additions & 1 deletion b/‎excuter/cpp-common/src/stdutil/fs.cpp‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp‎
Lines changed: 6 additions & 0 deletions b/‎excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎excuter/op-mem-cuda/src/deepx/tf/io.hpp‎
Lines changed: 58 additions & 14 deletions b/‎excuter/op-mem-cuda/src/deepx/tf/io.hpp‎
Lines changed: 58 additions & 14 deletions
diff --git a/‎front/py/deepx/__init__.py‎
Lines changed: 3 additions & 2 deletions b/‎front/py/deepx/__init__.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎front/py/deepx/nn/__init__.py‎
Lines changed: 3 additions & 2 deletions b/‎front/py/deepx/nn/__init__.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎front/py/deepx/transformer/modeling_rope_utils.py‎
Lines changed: 24 additions & 32 deletions b/‎front/py/deepx/transformer/modeling_rope_utils.py‎
Lines changed: 24 additions & 32 deletions
diff --git a/‎front/py/deepx/transformer/models/llama/attention.py‎
Lines changed: 15 additions & 11 deletions b/‎front/py/deepx/transformer/models/llama/attention.py‎
Lines changed: 15 additions & 11 deletions
diff --git a/‎front/py/deepx/transformer/models/llama/embedding.py‎
Lines changed: 8 additions & 8 deletions b/‎front/py/deepx/transformer/models/llama/embedding.py‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎front/py/deepx/transformer/models/llama/groupedquery_attention.py‎
Lines changed: 0 additions & 12 deletions b/‎front/py/deepx/transformer/models/llama/groupedquery_attention.py‎
Lines changed: 0 additions & 12 deletions
diff --git a/‎front/py/deepx/utils/__init__.py‎
Lines changed: 3 additions & 17 deletions b/‎front/py/deepx/utils/__init__.py‎
Lines changed: 3 additions & 17 deletions
@@ -17,9 +17,14 @@ namespace stdutil
 
     void save(const byte *data, size_t size, const string &path)
     {
-
         ofstream ofs(path, ios::binary | ios::out | ios::trunc);
+        if (!ofs.is_open()) {
+            throw std::runtime_error("Failed to open file for writing: " + path);
+        }
         ofs.write(reinterpret_cast<const char *>(data), size);
+        if (!ofs) {
+            throw std::runtime_error("Failed to write data to file: " + path);
+        }
         ofs.close();
     }
 
 
@@ -54,6 +54,12 @@ namespace deepx::tf
             case Precision::Float32:
                 reshape<Author, float>(*mem->gettensor<float>(this->args[0].textvalue), shape, *mem->gettensor<float>(this->returns[0].textvalue));
                 break;
+            case Precision::Float16:
+                reshape<Author, half>(*mem->gettensor<half>(this->args[0].textvalue), shape, *mem->gettensor<half>(this->returns[0].textvalue));
+                break;
+            case Precision::BFloat16:
+                reshape<Author, nv_bfloat16>(*mem->gettensor<nv_bfloat16>(this->args[0].textvalue), shape, *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
+                break;               
             case Precision::Int64:
                 reshape<Author, int64_t>(*mem->gettensor<int64_t>(this->args[0].textvalue), shape, *mem->gettensor<int64_t>(this->returns[0].textvalue));
                 break;
 
@@ -24,24 +24,68 @@ namespace deepx::tf
         int run(shared_ptr<MemBase> mem, string &error) override
         {
             string name = this->args[0].textvalue;
-            if (mem->existstensor(name))
-            {
-                auto t = mem->gettensor(name);
-                if (this->args.size() == 1)
-                {
-                    tensorfunc::print<Author, void>(*t);
-                }
-                else
-                {
-                    tensorfunc::print<Author, void>(*t, this->args[1].textvalue);
-                }
-            }
-            else
-            {
+            if (!mem->existstensor(name))
+            { 
                 std::cerr << "print " << name << " not found" << std::endl;
                 error = "print " + name + " not found";
                 return 1;
             }
+            string format="";
+            if (this->args.size() > 1){
+                format = this->args[1].textvalue;
+            }
+             
+            Precision dtype = mem->gettensor(name)->shape.dtype;
+            switch (dtype)
+            {   
+            case Precision::Float64:{
+                auto t = mem->gettensor<double>(name);
+                tensorfunc::print<Author,double>(*t,format);
+                break;
+            }
+            case Precision::Float32:{
+                auto t = mem->gettensor<float>(name);
+                tensorfunc::print<Author>(*t,format);
+                break;
+            }
+            case Precision::Float16:{
+                auto t = mem->gettensor<half>(name);
+                 tensorfunc::print<Author>(*t,format);
+                break;
+            }
+            case Precision::BFloat16:{
+                auto t = mem->gettensor<nv_bfloat16>(name);
+                 tensorfunc::print<Author>(*t,format);
+                break;
+            }
+            case Precision::Int64:{
+                auto t = mem->gettensor<int64_t>(name);
+                 tensorfunc::print<Author>(*t,format);
+                break;  
+            }
+            case Precision::Int32:{
+                auto t = mem->gettensor<int32_t>(name);
+                 tensorfunc::print<Author>(*t,format);
+                break;
+            }
+            case Precision::Int16:{
+                auto t = mem->gettensor<int16_t>(name);
+                 tensorfunc::print<Author>(*t,format);
+                break;  
+            }
+            case Precision::Int8:{
+                auto t = mem->gettensor<int8_t>(name);
+                tensorfunc::print<Author>(*t,format);
+                break;
+            }
+            case Precision::Bool:{
+                auto t = mem->gettensor<bool>(name);
+                tensorfunc::print<Author,bool>(*t,format);
+                break;  
+            }
+            default:
+                break;
+            }
             return 0;
         }
 
 
@@ -1,11 +1,12 @@
 from .tensor import Tensor,Shape,Number
 from deepx.nn.functional import *  # 导入所有functional函数
 from deepx.nn.functional import __all__ as _func_all  # 获取functional的导出列表
-
+from deepx.utils import __all__ as _utils_all  # 获取utils的导出列表
 __all__ = [
     #tensor
     'Tensor','Shape','Number',
-    *_func_all
+    *_func_all,
+    *_utils_all,
 ]
 
 # 为了支持 import deepx as dx 的用法
 
@@ -1,5 +1,6 @@
 from .deepxir import *
-
+from .modules import __all__ as _modules_all
 __all__ = [
-    "DeepxIR","DeepxIRResp"
+    "DeepxIR","DeepxIRResp",
+    *_modules_all
     ]
@@ -1,54 +1,46 @@
-from typing import   Tuple
+from typing import   Tuple,Optional
 import math
+from deepx.utils import Config
 from deepx import arange,Tensor,where
 
-def _compute_default_rope_parameters(config:dict={
-    "rope_theta":10000.0,
-    "head_dim":0,
-    "partial_rotary_factor":1.0,
-}) -> Tuple[Tensor, float]:
-    partial_rotary_factor = config.get("partial_rotary_factor", 1.0)
-    dim   = config["head_dim"]* partial_rotary_factor
-    # 计算逆频率
-    base=config["rope_theta"]
-    inv_freq = 1.0 / (base ** (arange(0, dim, 2, dtype='float64')/ dim))
-    return inv_freq, 1.0
+def _compute_default_rope_parameters(config:Config=None,seq_len: Optional[int] = None, **rope_kwargs) -> Tuple[Tensor, float]:
+    if len(rope_kwargs) > 0:
+        base = rope_kwargs["base"]
+        dim = rope_kwargs["dim"]
+    elif config is not None:
+        base = config.rope_theta
+        partial_rotary_factor = config.partial_rotary_factor if hasattr(config, "partial_rotary_factor") else 1.0
+        head_dim = getattr(config, "head_dim", None) or config.hidden_size // config.num_attention_heads
+        dim = int(head_dim * partial_rotary_factor)
+
+    attention_factor = 1.0  # Unused in this type of RoPE
+
+    # Compute the inverse frequencies
+    inv_freq = 1.0 / (base ** (arange(0, dim, 2, dtype="int64").float() / dim))
+    return inv_freq, attention_factor
 
-def _compute_llama3_parameters(config:dict={
-    "rope_theta":10000.0,
-    "head_dim":0,
-    "partial_rotary_factor":1.0,
-    "factor":8,
-    "low_freq_factor":1,
-    "high_freq_factor":4,
-    "old_context_len":8192,
-    "seq_len":None
-}) -> Tuple[Tensor, float]:
+def _compute_llama3_parameters(config:Config,seq_len: Optional[int] = None,**rope_kwargs) -> Tuple[Tensor, float]:
     # Gets the default RoPE parameters
-    inv_freq, attention_factor = _compute_default_rope_parameters(config)
+    inv_freq, attention_factor = _compute_default_rope_parameters(config, seq_len, **rope_kwargs)
 
-    factor = config["rope_scaling"]["factor"]  # `8` in the original implementation
-    low_freq_factor = config["rope_scaling"]["low_freq_factor"]  # `1` in the original implementation
-    high_freq_factor = config["rope_scaling"]["high_freq_factor"]  # `4` in the original implementation
-    old_context_len = config["rope_scaling"]["original_max_position_embeddings"]  # `8192` in the original implementation
+    factor = config.rope_scaling["factor"]  # `8` in the original implementation
+    low_freq_factor = config.rope_scaling["low_freq_factor"]  # `1` in the original implementation
+    high_freq_factor = config.rope_scaling["high_freq_factor"]  # `4` in the original implementation
+    old_context_len = config.rope_scaling["original_max_position_embeddings"]  # `8192` in the original implementation
 
     low_freq_wavelen = old_context_len / low_freq_factor
     high_freq_wavelen = old_context_len / high_freq_factor
 
     wavelen = 2 * math.pi / inv_freq
-    wavelen.print()
     # wavelen < high_freq_wavelen: do nothing
     # wavelen > low_freq_wavelen: divide by factor
     inv_freq_llama = where(wavelen > low_freq_wavelen, inv_freq / factor, inv_freq)
     # otherwise: interpolate between the two, using a smooth factor
     smooth_factor = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor)
     smoothed_inv_freq = (1 - smooth_factor) * inv_freq_llama / factor + smooth_factor * inv_freq_llama
     is_medium_freq = ~(wavelen < high_freq_wavelen) * ~(wavelen > low_freq_wavelen)
-    is_medium_freq.print()
-    # TODO 这一步执行后，会导致an illegal memory access was encountered
     inv_freq_llama =  where(is_medium_freq, smoothed_inv_freq, inv_freq_llama)
-    is_medium_freq.print()
-    inv_freq_llama.print()
+
     return inv_freq_llama, attention_factor
 
 ROPE_INIT_FUNCTIONS = {
 
@@ -1,6 +1,8 @@
 from typing import Optional,Tuple
-from deepx.nn.modules import Module,Linear
+from deepx import nn
 from deepx import Tensor,matmul,softmax,cat,dropout as dropout_func
+from deepx.nn.modules import Module
+from deepx.utils import Config
 
 
 
@@ -52,7 +54,9 @@ def eager_attention_forward(
     return attn_output, attn_weights
 
 class LlamaAttention(Module):
-    def __init__(self, config:dict, layer_idx: int):
+    """Multi-headed attention from 'Attention Is All You Need' paper"""
+
+    def __init__(self, config: Config, layer_idx: int):
         super().__init__()
         self.config = config
         self.layer_idx = layer_idx
@@ -62,19 +66,20 @@ def __init__(self, config:dict, layer_idx: int):
         self.attention_dropout = config.attention_dropout
         self.is_causal = True
 
-        self.q_proj = Linear(
+        self.q_proj = nn.Linear(
             config.hidden_size, config.num_attention_heads * self.head_dim, bias=config.attention_bias
         )
-        self.k_proj = Linear(
+        self.k_proj = nn.Linear(
             config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.attention_bias
         )
-        self.v_proj = Linear(
+        self.v_proj = nn.Linear(
             config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.attention_bias
         )
-        self.o_proj = Linear(
+        self.o_proj = nn.Linear(
             config.num_attention_heads * self.head_dim, config.hidden_size, bias=config.attention_bias
         )
 
+
     def forward(
         self,
         hidden_states:  Tensor,
@@ -90,17 +95,16 @@ def forward(
 
         cos, sin = position_embeddings
         query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
- 
- 
-        attn_output, attn_weights = attention_interface(
+
+
+        attn_output, attn_weights =eager_attention_forward(
             self,
             query_states,
             key_states,
             value_states,
             attention_mask,
-            dropout=0.0 if not self.training else self.attention_dropout,
             scaling=self.scaling,
-            **kwargs,
+            dropout=0.0 if not self.training else self.attention_dropout
         )
 
         attn_output = attn_output.reshape(*input_shape, -1)
 
@@ -1,17 +1,17 @@
 from deepx.nn.modules import Module
-from deepx import Tensor,concat
+from deepx import  cat
 from deepx.transformer.modeling_rope_utils import ROPE_INIT_FUNCTIONS
-
+from deepx.utils import Config
 # https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py
 class LlamaRotaryEmbedding(Module):
-    def __init__(self,config:dict):
+    def __init__(self,config:Config):
         super().__init__()
         # 最大序列长度
-        self.max_seq_len_cached = config["max_position_embeddings"]
+        self.max_seq_len_cached = config.max_position_embeddings
         # 原始最大序列长度
-        self.original_max_seq_len = config["max_position_embeddings"]
+        self.original_max_seq_len = config.max_position_embeddings
         # 旋转类型
-        self.rope_type=config["rope_scaling"]["rope_type"]
+        self.rope_type=config.rope_scaling.rope_type
         # 旋转初始化函数
         self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type]
         # 旋转初始化函数
@@ -39,7 +39,7 @@ def __init__(self,config:dict):
 
     def forward(self, x, position_ids):
         # 扩展旋转频率
-        inv_freq_expanded = self.inv_freq[None, :, None].todtype('float32').expand((position_ids.shape[0], -1, 1))
+        inv_freq_expanded = self.inv_freq[None, :, None].float().expand((position_ids.shape[0], -1, 1))
 
         # 使用torch.unsqueeze和type转换替代索引操作
         position_ids_expanded = position_ids[:, None, :].float()
@@ -48,7 +48,7 @@ def forward(self, x, position_ids):
         # 计算频率
         freqs = (inv_freq_expanded @ position_ids_expanded).T
         # 拼接频率
-        emb = concat((freqs, freqs), dim=-1)
+        emb = cat((freqs, freqs), dim=-1)
         # 计算余弦和正弦
         cos = emb.cos()
         sin = emb.sin()
 
@@ -1,19 +1,5 @@
-from .tensor import Tensor
-from .creation import zeros, ones, arange
-from .elementwise import add, sub, mul, div
-from .matmul import matmul, dot
-from .reduction import sum, mean, max, min
-from .shape import reshape, transpose
-from .comparison import lt, gt, eq
-from .trigonometric import sin, cos, tan
+from .config import Config
 
 __all__ = [
-    'Tensor',
-    'zeros', 'ones', 'arange',
-    'add', 'sub', 'mul', 'div',
-    'matmul', 'dot',
-    'sum', 'mean', 'max', 'min',
-    'reshape', 'transpose',
-    'lt', 'gt', 'eq',
-    'sin', 'cos', 'tan'
-] 
+    'Config',
+    ]
Original file line number	Diff line number	Diff line change
`@@ -17,9 +17,14 @@ namespace stdutil`
`17`	`17`
`18`	`18`	`void save(const byte *data, size_t size, const string &path)`
`19`	`19`	`{`
`20`		`-`
`21`	`20`	`ofstream ofs(path, ios::binary \| ios::out \| ios::trunc);`
	`21`	`+ if (!ofs.is_open()) {`
	`22`	`+ throw std::runtime_error("Failed to open file for writing: " + path);`
	`23`	`+ }`
`22`	`24`	`ofs.write(reinterpret_cast<const char *>(data), size);`
	`25`	`+ if (!ofs) {`
	`26`	`+ throw std::runtime_error("Failed to write data to file: " + path);`
	`27`	`+ }`
`23`	`28`	`ofs.close();`
`24`	`29`	`}`
`25`	`30`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,6 @@`
`1`	`1`	`from .deepxir import *`
`2`		`-`
	`2`	`+from .modules import __all__ as _modules_all`
`3`	`3`	`__all__ = [`
`4`		`- "DeepxIR","DeepxIRResp"`
	`4`	`+ "DeepxIR","DeepxIRResp",`
	`5`	`+ *_modules_all`
`5`	`6`	`]`