From 162767df1e74588c4ccd1c7bedc96f7dca475197 Mon Sep 17 00:00:00 2001 From: Yang Kun <193369907+nukyan@users.noreply.github.com> Date: Mon, 23 Mar 2026 17:12:00 +0800 Subject: [PATCH] Fix several wrongs --- .../fast_io_core_impl/dynamic_output_buffer.h | 14 ++- include/fast_io_core_impl/read_all.h | 94 ------------------- include/fast_io_core_impl/simd_find.h | 4 +- include/fast_io_crypto/cipher/aes.h | 44 ++++++++- include/fast_io_i18n/lc_numbers/cond.h | 5 +- 5 files changed, 57 insertions(+), 104 deletions(-) diff --git a/include/fast_io_core_impl/dynamic_output_buffer.h b/include/fast_io_core_impl/dynamic_output_buffer.h index c35d5a263..4d054fd66 100644 --- a/include/fast_io_core_impl/dynamic_output_buffer.h +++ b/include/fast_io_core_impl/dynamic_output_buffer.h @@ -67,9 +67,17 @@ write_all_overflow_define_impl(basic_generic_dynamic_output_buffer(bob.end_ptr - bob.begin_ptr)}; ::std::size_t rlsz{static_cast<::std::size_t>(bob.curr_ptr - bob.begin_ptr)}; ::std::size_t diff{static_cast<::std::size_t>(last - first)}; - ::std::size_t to_allocate{bfsz + diff}; - ::std::size_t twicebfsz; constexpr ::std::size_t mx{::std::numeric_limits<::std::size_t>::max()}; + ::std::size_t to_allocate; + if (bfsz > mx - diff) + { + to_allocate = mx; + } + else + { + to_allocate = bfsz + diff; + } + ::std::size_t twicebfsz; constexpr ::std::size_t mxdv2{mx >> 1u}; if (bfsz > mxdv2) { @@ -77,7 +85,7 @@ write_all_overflow_define_impl(basic_generic_dynamic_output_buffer -inline output_iter type_punning_copy(char_type const* first,char_type const* last,output_iter result) -{ - using value_type = ::std::iter_value_t; - if constexpr(sizeof(value_type)==0) - return result; - else if constexpr(sizeof(value_type)==sizeof(char_type)) - { - for(;first!=last;) - { - if constexpr(::std::same_as) - *result=*first; - else - { - my_memcpy(__builtin_addressof(result),first,sizeof(value_type)); - } - ++first; - ++result; - } - return result; - } - else - { - static_assert(sizeof(char_type)==1); - for(;first!=last;) - { - my_memcpy(__builtin_addressof(result),first,sizeof(value_type)); - first+=sizeof(value_type); - ++result; - } - return result; - } -} -template<::fast_io::input_stream input,::std::forward_iterator Iter> -inline constexpr void read_all_impl_none_contiguous(input in,Iter first,Iter last) -{ - using char_type = typename input::char_type; - using iter_value_type = ::std::iter_value_t; - if constexpr(::fast_io::buffer_input_stream) - { - auto to_read{::std::distance(first,last)}; - for(;to_read;) - { - auto curr{ibuffer_curr(in)}; - auto ed{ibuffer_end(in)}; - auto remains{ed-curr}; - if constexpr(sizeof(char_type)==sizeof(iter_value_type)) - { - if(to_read(to_read)*sizeof(iter_value_type)}; - if(real_to_read=buffer_size) - { - - } - } - } -} -#endif template <::fast_io::input_stream input> inline constexpr void read_all_impl_decay(input in, typename input::char_type *first, typename input::char_type *last) { diff --git a/include/fast_io_core_impl/simd_find.h b/include/fast_io_core_impl/simd_find.h index 3db9d3393..45938f290 100644 --- a/include/fast_io_core_impl/simd_find.h +++ b/include/fast_io_core_impl/simd_find.h @@ -7,7 +7,7 @@ inline constexpr auto create_find_simd_vector_with_unsigned_toggle(char_type val { ::fast_io::freestanding::array arr; using signed_char_type = ::std::make_signed_t; - using unsigned_char_type = ::std::make_signed_t; + using unsigned_char_type = ::std::make_unsigned_t; constexpr unsigned_char_type signed_min_unsigned_val{ static_cast(::std::numeric_limits::min())}; if constexpr (signed_disposition) @@ -213,7 +213,7 @@ inline constexpr char_type const *find_space_simd_common_impl(char_type const *f else { using unsigned_char_type = ::std::make_unsigned_t<::std::remove_cvref_t>; - using signed_char_type = ::std::make_unsigned_t; + using signed_char_type = ::std::make_signed_t; constexpr char_type spacech{char_literal_v>}; constexpr char_type horizontaltab{char_literal_v>}; constexpr char_type verticaltab{char_literal_v>}; diff --git a/include/fast_io_crypto/cipher/aes.h b/include/fast_io_crypto/cipher/aes.h index da686656d..f3b0159b3 100644 --- a/include/fast_io_crypto/cipher/aes.h +++ b/include/fast_io_crypto/cipher/aes.h @@ -58,7 +58,7 @@ struct aes { inline static constexpr ::std::size_t block_size = 16; inline static constexpr ::std::size_t key_size = keysize; - inline static constexpr ::std::size_t key_schedule_size = keysize == 16 ? 10 : (keysize == 24 ? 12 : 15); + inline static constexpr ::std::size_t key_schedule_size = keysize == 16 ? 11 : (keysize == 24 ? 13 : 15); __m128i key_schedule[key_schedule_size]; inline explicit aes(::std::span<::std::byte const, key_size> key_span) noexcept { @@ -135,9 +135,49 @@ struct aes key_schedule[13] = aes_256_key_exp_2(key_schedule[11], key_schedule[12]); key_schedule[14] = aes_256_key_exp(key_schedule[12], key_schedule[13], 0x40); } + if constexpr (decrypt) + { + // Prepare decryption key schedule: reverse order and apply InvMixColumns + for (::std::size_t i{}, j{key_schedule_size - 1}; i < j; ++i, --j) + { + __m128i tmp = key_schedule[i]; + key_schedule[i] = key_schedule[j]; + key_schedule[j] = tmp; + } + for (::std::size_t i{1}; i < key_schedule_size - 1; ++i) + { + key_schedule[i] = _mm_aesimc_si128(key_schedule[i]); + } + } } inline void operator()(::std::byte const *from, ::std::size_t blocks, ::std::byte *to) noexcept - {} + { + constexpr ::std::size_t rounds = key_schedule_size - 1; + for (::std::size_t i{}; i != blocks; ++i) + { + __m128i block = _mm_loadu_si128(reinterpret_cast<__m128i const *>(from)); + block = _mm_xor_si128(block, key_schedule[0]); + if constexpr (!decrypt) + { + for (::std::size_t j{1}; j != rounds; ++j) + { + block = _mm_aesenc_si128(block, key_schedule[j]); + } + block = _mm_aesenclast_si128(block, key_schedule[rounds]); + } + else + { + for (::std::size_t j{1}; j != rounds; ++j) + { + block = _mm_aesdec_si128(block, key_schedule[j]); + } + block = _mm_aesdeclast_si128(block, key_schedule[rounds]); + } + _mm_storeu_si128(reinterpret_cast<__m128i *>(to), block); + from += block_size; + to += block_size; + } + } }; } // namespace fast_io diff --git a/include/fast_io_i18n/lc_numbers/cond.h b/include/fast_io_i18n/lc_numbers/cond.h index bd86cb58a..95a9b8633 100644 --- a/include/fast_io_i18n/lc_numbers/cond.h +++ b/include/fast_io_i18n/lc_numbers/cond.h @@ -102,12 +102,11 @@ inline constexpr char_type *cond_lc_print_reserve_define_impl(basic_lc_all) { - constexpr ::std::size_t sz{print_reserve_size(io_reserve_type)}; - return sz; + return print_reserve_define(io_reserve_type, iter, c); } else { - return print_reserve_size(io_reserve_type, c); + return print_reserve_define(io_reserve_type, iter, c); } }