diff --git a/Documentation/changelog.md b/Documentation/changelog.md index d0d5eb04..2a5b7e78 100644 --- a/Documentation/changelog.md +++ b/Documentation/changelog.md @@ -11,6 +11,7 @@ Bug fixes: - 2026.1.2 update: fix x-angle in beam specification - 2026.1.3: further fixes for beam view glitches for long renders - 2026.1.4: fix addPulse sequence function +- 2026.1.5: fix loading of waveforms Programming things: - The CPU FFT library was switched to PocketFFT. This leads to much better performance on the AMD systems I tested; slightly worse on Intel. However, it also makes maintenance and compiling much easier. diff --git a/Source/Devices/LWEActiveDeviceCPU.h b/Source/Devices/LWEActiveDeviceCPU.h index 7491448e..ec1b7d7a 100644 --- a/Source/Devices/LWEActiveDeviceCPU.h +++ b/Source/Devices/LWEActiveDeviceCPU.h @@ -247,12 +247,12 @@ class CPUDevice : public LWEDevice { #endif int deviceCalloc(void** ptr, const size_t N, const size_t elementSize) override { const size_t request = N*elementSize; - const size_t standard_alignment = 2*sizeof(deviceFP); - const size_t bytes = (request % standard_alignment) ? standard_alignment * (request / standard_alignment) + standard_alignment : request; + const size_t standard_alignment = 64; + const size_t bytes = standard_alignment * ((request + standard_alignment - 1)/standard_alignment); #ifndef _WIN32 *ptr = std::aligned_alloc(standard_alignment, bytes); #else - *ptr = std::malloc(bytes); + *ptr = _aligned_malloc(bytes, standard_alignment); #endif if(*ptr == nullptr) return 1; std::memset(*ptr, 0, bytes); @@ -266,7 +266,11 @@ class CPUDevice : public LWEDevice { } void deviceFree(void* block) override { - std::free(block); + #ifndef _WIN32 + std::free(block); + #else + _aligned_free(block); + #endif } inline bool isTheCanaryPixelNaN(const deviceFP* canaryPointer) { diff --git a/Source/LightwaveExplorerUtilities.cpp b/Source/LightwaveExplorerUtilities.cpp index 6446c040..432609b9 100644 --- a/Source/LightwaveExplorerUtilities.cpp +++ b/Source/LightwaveExplorerUtilities.cpp @@ -331,14 +331,14 @@ int loadWaveformFile( const int64_t NfreqData = lineCount / 2 + 1; //FFT the waveform onto a frequency grid - std::vector> fftOfEin(NfreqData, 0.0); + std::vector> fftOfEin(NfreqData, std::complex(0.0)); pocketfft::r2c( - {static_cast(lineCount)}, - {sizeof(float)}, - {sizeof(std::complex)}, - pocketfft::shape_t{0}, pocketfft::FORWARD, - Ein.data(), fftOfEin.data(), - 1.0); + {static_cast(lineCount)}, + {sizeof(double)}, + {sizeof(std::complex)}, + pocketfft::shape_t{0}, pocketfft::FORWARD, + Ein.data(), fftOfEin.data(), + 1.0); //apply a time shift so that the frequency-domain solution //oscillates slowly (will be undone after interpolation)