@@ -1791,10 +1791,10 @@ class BitNetQuantConfig(QuantizationConfigMixin):
17911791 In `offline` mode, quantization parameters are pre-calculated *before* inference.
17921792 These parameters are then fixed and loaded into the quantized model. This
17931793 generally results in lower runtime overhead compared to online quantization.
1794- use_rms_norm (`bool`, *optional*, defaults to `False` ):
1794+ use_rms_norm (`bool`, *optional*):
17951795 Whether to apply RMSNorm on the activations before quantization. This matches the original BitNet paper's approach
17961796 of normalizing activations before quantization/packing.
1797- rms_norm_eps (`float`, *optional*, defaults to 1e-6 ):
1797+ rms_norm_eps (`float`, *optional*):
17981798 The epsilon value used in the RMSNorm layer for numerical stability.
17991799 kwargs (`Dict[str, Any]`, *optional*):
18001800 Additional keyword arguments that may be used by specific quantization
@@ -1806,8 +1806,8 @@ def __init__(
18061806 modules_to_not_convert : Optional [List ] = None ,
18071807 linear_class : Optional [str ] = "bitlinear" ,
18081808 quantization_mode : Optional [str ] = "offline" ,
1809- use_rms_norm : bool = False ,
1810- rms_norm_eps : float = 1e-6 ,
1809+ use_rms_norm : bool = None ,
1810+ rms_norm_eps : float = None ,
18111811 ** kwargs ,
18121812 ):
18131813 if linear_class not in ["bitlinear" , "autobitlinear" ]:
0 commit comments