MovingUniverseLab · hustonm · Feb 2, 2026 · Feb 2, 2026 · Feb 2, 2026 · Feb 6, 2026
diff --git a/popsycle/analysis.py b/popsycle/analysis.py
@@ -2,11 +2,15 @@
 import numpy as np
 import h5py
 import pylab as plt
-
-from popsycle import utils
-from popsycle import synthetic
+from scipy.spatial import KDTree
+from popsycle import utils, synthetic, phot_utils
 import time
 import os
+import pdb
+import warnings
+from itertools import zip_longest
+
+filt_dict = phot_utils.make_filt_dict()
 
 def get_star_system_pos_mag(hdf5_file, filt='ubv_I', recalc=True):
     """
@@ -57,7 +61,7 @@ def get_star_system_pos_mag(hdf5_file, filt='ubv_I', recalc=True):
     list_of_df = []
 
     # Loop through the fields and aggregate the stars.
-    print('Countint stars in patches: ', end="")
+    print('Counting stars in patches: ', end="")
     for k in field_keys:
         print(f'{k}, ', end="")
         patch = np.array(hf[k])
@@ -127,3 +131,268 @@ def count_stars_hdf5(hdf5_file, filt='ubv_I', mag_threshold=21):
     N_stars = len(gdx)
 
     return N_stars
+
+def count_stars_all(h5_file):
+    """
+    Finds the number of stars or systems in the field.
+
+    Parameters
+    ----------
+    hdf5_file : str
+        Filename of an hdf5 file.
+
+    Returns
+    -------
+    n_stars : int
+        Number of stars.
+    """
+    hf = h5py.File(h5_file, 'r')
+    n_stars = 0
+    for k in list(hf.keys()):
+        if (k.startswith('l')) & ('_' not in k):
+            n_stars += hf[k].shape[0]
+    return n_stars
+
+def count_stars_all_per_bin(h5_file):
+    """
+    Finds the number of stars or systems per bin.
+
+    Parameters
+    ----------
+    hdf5_file : str
+        Filename of an hdf5 file.
+
+    Returns
+    -------
+    n_stars : list
+        Number of stars per bin.
+    """
+    hf = h5py.File(h5_file, 'r')
+    n_stars = []
+    for k in list(hf.keys()):
+        if (k.startswith('l')) & ('_' not in k):
+            n_stars.append(hf[k].shape[0])
+    return n_stars
+
+def events_for_popclass(h5_file, max_stars_per_bin=3e3):
+    """
+    Draw microlensing events for random lens, source pairs from a 
+    PopSyCLE singles-only catalog. 
+
+    Parameters
+    ----------
+    hdf5_file : str
+        Filename of an hdf5 file.
+
+    max_stars_per_bin : str
+        Maxinum number of stars per bin to use for the 
+        calculation. Prevents excessive memory/computation use.
+        Default is 3e3.
+
+    Returns
+    -------
+    rem_ids : np.array
+        array of lens types
+    thetaEs : np.array
+        array of Einstein ring radii for events (mas)
+    piEs : np.array
+        array of microlensing parallaxes for events (unitless)
+    tEs : np.array
+        array of timescales for events (days)
+    weights : np.array
+        array of relative weights for events (mu_rel * thetaE)
+    """
+    hf = h5py.File(h5_file, 'r')
+    rem_ids = []
+    thetaEs = []
+    piEs = []
+    tEs = []
+    weights = []
+    stars_per_bin = count_stars_all_per_bin(h5_file)
+    scale_stars_used = np.maximum(1,int(np.floor(np.max(stars_per_bin)/max_stars_per_bin)))
+    for k in list(hf.keys()):
+        if (k.startswith('l')) & ('_' not in k):
+            print('running', k)
+            dat = hf[k]
+
+            if dat.shape[0] > 0:
+                patch = dat[::scale_stars_used]
+                dists = patch['rad']
+                mul = patch['mu_lcosb']
+                mub = patch['mu_b']
+                masses = patch['mass']
+                rem_id_catalog = patch['rem_id']
+                idx = np.arange(len(masses))
+                del patch
+
+                src_idxs, lens_idxs = np.meshgrid(idx, idx)
+                src_idxs, lens_idxs = src_idxs.ravel(), lens_idxs.ravel()
+
+                dist_comp = (dists[src_idxs] > dists[lens_idxs]) #source further than lens
+                use_srcs = src_idxs[dist_comp]
+                use_lens = lens_idxs[dist_comp]
+                rem_id = rem_id_catalog[use_lens]
+
+                # Microlensing math
+                pi_rel = (1/dists[use_lens] - 1/dists[use_srcs])
+                c, G, mSun, pctom = 299792458, 6.6743e-11, 1.98840987e+30, 3.08567758e+16
+                theta_e = np.sqrt(4*G*mSun*masses[use_lens]*pi_rel/(1000*pctom*c**2)) * 180/np.pi * 60**2 * 1000
+                pi_e = pi_rel / theta_e
+                mu_rel = np.sqrt((mul[use_lens]-mul[use_srcs])**2 + (mub[use_lens]-mub[use_srcs])**2)
+                t_e = theta_e/mu_rel * 365.25 # years -> days
+                thetamu = theta_e*mu_rel
+                rem_ids.append(rem_id)
+                thetaEs.append(theta_e)
+                piEs.append(pi_e)
+                tEs.append(t_e)
+                weights.append(thetamu)
+    print(f'Drew {len(np.concatenate(thetaEs))} events total')
+    return np.concatenate(rem_ids), np.concatenate(thetaEs), np.concatenate(piEs), np.concatenate(tEs), np.concatenate(weights)
+
+
+def _calc_blend_clusters(points, radius, star_idxs):
+    """
+    Helper function for calc_blends to run the KDTree clustering
+    """
+    # Set up KDTree and array to note whether each point has been assigned a cluster
+    tree = KDTree(points)
+    n_points = len(points)
+    assigned = np.zeros(n_points, dtype=bool)
+    clusters = []
+    prim_idxs = []
+    for i in range(n_points):
+        # Find unassigned neighbors and save cluster
+        if not assigned[i]:
+            neighbor_indices = tree.query_ball_point(points[i], r=radius)
+            current_cluster = [idx for idx in neighbor_indices if not assigned[idx]]
+            assigned[current_cluster] = True
+            clusters.append(current_cluster)
+            prim_idxs.append(star_idxs[i])
+    return clusters, np.array(prim_idxs)
+
+def _calc_blends_bin(star_dat, blend_rad, filters,
+                primary_filter=None, ext_law='Damineli16'):
+    """
+    Helper function for calc_blends to process 1 bin
+    """
+    # Clean up table and sort by magnitude
+    if primary_filter is None:
+        warnings.warn(f"No primary_filter provided. Using {filters[0]}"+
+            " to sort stars and flux-weight additional parameters.")
+        primary_filter = filters[0]
+    for filt in filters:
+        star_dat.loc[:,filt] = synthetic.calc_app_mag(star_dat['rad'].to_numpy(),
+                star_dat[filt].to_numpy(), star_dat['exbv'].to_numpy(),
+                filt_dict[filt][ext_law])
+    star_dat = star_dat[~np.isnan(star_dat[primary_filter])].copy()
+    star_dat.sort_values(by=primary_filter, inplace=True)
+    star_dat.reset_index(drop=True,inplace=True)
+
+    # Get coordinates into simplified delta_l_cosb and delta_b frame
+    l_mean = np.mean(star_dat['glon']); b_mean = np.mean(star_dat['glat'])
+    all_l, all_b = star_dat['glon'].to_numpy(), star_dat['glat'].to_numpy()
+    delta_l_cosb = ((all_l-360*(all_l>180))-l_mean)*np.cos(all_b*np.pi/180)
+    delta_b = all_b-b_mean
+    all_pts = np.transpose([delta_l_cosb, delta_b])
+    star_idxs = star_dat['obj_id'].to_numpy()
+
+    # Set up arrays with necessary data
+    mags = star_dat[filters].to_numpy()
+    mags_prim = star_dat[primary_filter].to_numpy()
+    star_dat.loc[:,'plx'] = 1/star_dat['rad']
+    astrom_vals = star_dat[['glon','glat','mu_lcosb',
+                           'mu_b','plx','exbv']].to_numpy()
+
+    # Run the cluster calculation on the sorted/cleaned points
+    clusters0, prim_idxs = _calc_blend_clusters(all_pts, blend_rad, star_idxs)
+    # Turn this into a masked array for easy mag + param sums
+    clusters_arr = np.array(list(zip_longest(*clusters0, fillvalue=-1))).T
+    clusters = np.ma.masked_values(clusters_arr, -1)
+
+    # Compute the magnitude sums & weighted astrometry parameters
+    mags = np.append(mags, [np.repeat(np.inf, len(filters))], axis=0)
+    bmags = -2.5*np.log10(np.sum(10**(-0.4*mags[clusters,:]),axis=1))
+    out = {}
+    for i,f in enumerate(filters):
+        out[f] = bmags[:,i]
+    mags_prim = np.append(mags_prim, [np.inf])
+    astrom_vals = np.append(astrom_vals, [np.repeat(0, 6)], axis=0)
+    fluxes = 10**(-0.4*mags_prim[clusters])
+    bvals = (np.sum(fluxes.T*astrom_vals[clusters,:].T, axis=1)
+                / np.sum(fluxes,axis=1)).T
+    for i, col in enumerate(['glon','glat','mu_lcosb',
+                             'mu_b','plx', 'exbv']):
+        out[col] = bvals[:,i]
+    out['obj_id'] = prim_idxs
+    return out
+
+def calc_blends(hdf5_file, blend_rad, filters,
+                primary_filter=None, ext_law='Damineli16',
+                recalc=False, combine_bins=False):
+    """
+    Artificially blend the catalog based on a given radius &
+    save the blend catalog to a file. Outputs are: summed magnitudes
+    in selected filters, primary filter flux-weighted positions,
+    proper motions, parallaxes, and reddening, and the obj_id in
+    the original table of the brightest star in the blend.
+
+    Parameters
+    ----------
+    hdf5_file : str
+        Filename of an hdf5 file.
+    blend_radius : float
+        blend radius in degrees
+    filters : list of str
+        column headers for photometry to blend
+    primary_filter : str = None
+        filter to use for astrometric parameter flux-weighting.
+        if None, use the first filter in the list.
+    ext_law : str = 'Damineli16'
+        extinction law
+    recalc : boolean = False
+        if True, recalculate and replace existing file
+    combine_bins : boolean = False
+        if True, save a single dataset instead of binned data
+    """
+    outfile_name = hdf5_file.replace('.h5', '_blended_catalog.h5')
+
+    if os.path.exists(outfile_name) and recalc == False:
+        print(f'{outfile_name} exists. Set recalc=True to recalculate.')
+        return
+
+    hf = h5py.File(hdf5_file, 'r')
+    outfile = h5py.File(outfile_name, 'a')
+
+    for dset_name in list(hf.keys()):
+        if (dset_name.startswith('l')) & ('_' not in dset_name):
+            print(dset_name)
+            blend_bin = _calc_blends_bin(pd.DataFrame(hf[dset_name][:]),
+                blend_rad, filters,
+                primary_filter=primary_filter, ext_law=ext_law)
+            compound_dtype = synthetic._generate_compound_dtype(blend_bin)
+            save_data = np.empty(len(blend_bin['plx']), dtype=compound_dtype)
+            for colname in blend_bin:
+                save_data[colname] = blend_bin[colname]
+            if combine_bins:
+                if 'data' not in outfile:
+                    dataset = outfile.create_dataset('data', shape=(0,),
+                                                chunks=(1e4,),
+                                                maxshape=(None,),
+                                                dtype=compound_dtype)
+                else:
+                    dataset = outfile['data']
+                old_size = dataset.shape[0]
+            else:
+                if dset_name not in outfile:
+                    dataset = outfile.create_dataset(dset_name, shape=(0,),
+                                                chunks=(1e4,),
+                                                maxshape=(None,),
+                                                dtype=compound_dtype)
+                else:
+                    dataset = outfile[dset_name]
+                old_size = 0
+            new_size = old_size + len(blend_bin['plx'])
+            dataset.resize((new_size, ))
+            dataset[old_size:new_size] = save_data
+    hf.close()
+    outfile.close()
diff --git a/popsycle/synthetic.py b/popsycle/synthetic.py
@@ -6406,11 +6406,11 @@ def calc_app_mag(r, M, E, f):
 
     Parameters
     ----------
-    M : float or array
-        Absolute magnitude of star
-
     r : float or array
         Distance of star from sun (in kpc)
+
+    M : float or array
+        Absolute magnitude of star
 
     E : float or array
         Extinction law