Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions cuvarbase/lombscargle.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
# import pycuda.autoinit
import skcuda.fft as cufft

from .core import GPUAsyncProcess
from .utils import find_kernel, _module_reader, normalize_light_curves
Expand Down Expand Up @@ -459,34 +460,51 @@ def memory_requirement(self, n0, nf, k0, nbatch=1,

fft_size = H * (nf + k0)

mem = 0

# data
mem += 3 * n0

# final result
mem += nf

# regularization
mem += 2 * H + 1

rsize = self.real_type(1).nbytes
csize = self.complex_type(1).nbytes
c = int(np.ceil(float(csize) / rsize))

if kwargs.get('use_fft', True):
# yw grid / fft (doubled because complex)
mem = c * sigma * (fft_size - k0)
mem += c * sigma * (fft_size - k0)

# work area size for cufft.Plan
# double because large non-power-of-two sizes trigger Bluestein algorithm
nx = sigma * (fft_size - k0)
mem += 1/rsize * 2 * cufft.cufft.cufftEstimate1d(nx, cufft.cufft.CUFFT_C2C)

# w grid / fft (doubled because complex)
mem += c * sigma * (2 * fft_size - k0)

# work area size for cufft.Plan
# double because large non-power-of-two sizes trigger Bluestein algorithm
nx = sigma * (2 * fft_size - k0)
mem += 1/rsize * 2 * cufft.cufft.cufftEstimate1d(nx, cufft.cufft.CUFFT_C2C)

# precomputation (q1 = n0, q2 = n0, q3 = 2m + 1)
mem += 2 * n0 + 2 * m + 1

# inverse of design matrix
if H > 1:

# sparse matrix A (block-diagonal)
mem += (2 * H) ** 2 * nbatch
mem += (2 * H) ** 2

# vector b (Ax = b)
mem += nbatch
mem += 1

mem *= nbatch

# size of float
mem *= rsize
Expand Down