diff --git a/popsycle/analysis.py b/popsycle/analysis.py index ac3871f..2d4d695 100644 --- a/popsycle/analysis.py +++ b/popsycle/analysis.py @@ -2,11 +2,15 @@ import numpy as np import h5py import pylab as plt - -from popsycle import utils -from popsycle import synthetic +from scipy.spatial import KDTree +from popsycle import utils, synthetic, phot_utils import time import os +import pdb +import warnings +from itertools import zip_longest + +filt_dict = phot_utils.make_filt_dict() def get_star_system_pos_mag(hdf5_file, filt='ubv_I', recalc=True): """ @@ -57,7 +61,7 @@ def get_star_system_pos_mag(hdf5_file, filt='ubv_I', recalc=True): list_of_df = [] # Loop through the fields and aggregate the stars. - print('Countint stars in patches: ', end="") + print('Counting stars in patches: ', end="") for k in field_keys: print(f'{k}, ', end="") patch = np.array(hf[k]) @@ -127,3 +131,268 @@ def count_stars_hdf5(hdf5_file, filt='ubv_I', mag_threshold=21): N_stars = len(gdx) return N_stars + +def count_stars_all(h5_file): + """ + Finds the number of stars or systems in the field. + + Parameters + ---------- + hdf5_file : str + Filename of an hdf5 file. + + Returns + ------- + n_stars : int + Number of stars. + """ + hf = h5py.File(h5_file, 'r') + n_stars = 0 + for k in list(hf.keys()): + if (k.startswith('l')) & ('_' not in k): + n_stars += hf[k].shape[0] + return n_stars + +def count_stars_all_per_bin(h5_file): + """ + Finds the number of stars or systems per bin. + + Parameters + ---------- + hdf5_file : str + Filename of an hdf5 file. + + Returns + ------- + n_stars : list + Number of stars per bin. + """ + hf = h5py.File(h5_file, 'r') + n_stars = [] + for k in list(hf.keys()): + if (k.startswith('l')) & ('_' not in k): + n_stars.append(hf[k].shape[0]) + return n_stars + +def events_for_popclass(h5_file, max_stars_per_bin=3e3): + """ + Draw microlensing events for random lens, source pairs from a + PopSyCLE singles-only catalog. + + Parameters + ---------- + hdf5_file : str + Filename of an hdf5 file. + + max_stars_per_bin : str + Maxinum number of stars per bin to use for the + calculation. Prevents excessive memory/computation use. + Default is 3e3. + + Returns + ------- + rem_ids : np.array + array of lens types + thetaEs : np.array + array of Einstein ring radii for events (mas) + piEs : np.array + array of microlensing parallaxes for events (unitless) + tEs : np.array + array of timescales for events (days) + weights : np.array + array of relative weights for events (mu_rel * thetaE) + """ + hf = h5py.File(h5_file, 'r') + rem_ids = [] + thetaEs = [] + piEs = [] + tEs = [] + weights = [] + stars_per_bin = count_stars_all_per_bin(h5_file) + scale_stars_used = np.maximum(1,int(np.floor(np.max(stars_per_bin)/max_stars_per_bin))) + for k in list(hf.keys()): + if (k.startswith('l')) & ('_' not in k): + print('running', k) + dat = hf[k] + + if dat.shape[0] > 0: + patch = dat[::scale_stars_used] + dists = patch['rad'] + mul = patch['mu_lcosb'] + mub = patch['mu_b'] + masses = patch['mass'] + rem_id_catalog = patch['rem_id'] + idx = np.arange(len(masses)) + del patch + + src_idxs, lens_idxs = np.meshgrid(idx, idx) + src_idxs, lens_idxs = src_idxs.ravel(), lens_idxs.ravel() + + dist_comp = (dists[src_idxs] > dists[lens_idxs]) #source further than lens + use_srcs = src_idxs[dist_comp] + use_lens = lens_idxs[dist_comp] + rem_id = rem_id_catalog[use_lens] + + # Microlensing math + pi_rel = (1/dists[use_lens] - 1/dists[use_srcs]) + c, G, mSun, pctom = 299792458, 6.6743e-11, 1.98840987e+30, 3.08567758e+16 + theta_e = np.sqrt(4*G*mSun*masses[use_lens]*pi_rel/(1000*pctom*c**2)) * 180/np.pi * 60**2 * 1000 + pi_e = pi_rel / theta_e + mu_rel = np.sqrt((mul[use_lens]-mul[use_srcs])**2 + (mub[use_lens]-mub[use_srcs])**2) + t_e = theta_e/mu_rel * 365.25 # years -> days + thetamu = theta_e*mu_rel + rem_ids.append(rem_id) + thetaEs.append(theta_e) + piEs.append(pi_e) + tEs.append(t_e) + weights.append(thetamu) + print(f'Drew {len(np.concatenate(thetaEs))} events total') + return np.concatenate(rem_ids), np.concatenate(thetaEs), np.concatenate(piEs), np.concatenate(tEs), np.concatenate(weights) + + +def _calc_blend_clusters(points, radius, star_idxs): + """ + Helper function for calc_blends to run the KDTree clustering + """ + # Set up KDTree and array to note whether each point has been assigned a cluster + tree = KDTree(points) + n_points = len(points) + assigned = np.zeros(n_points, dtype=bool) + clusters = [] + prim_idxs = [] + for i in range(n_points): + # Find unassigned neighbors and save cluster + if not assigned[i]: + neighbor_indices = tree.query_ball_point(points[i], r=radius) + current_cluster = [idx for idx in neighbor_indices if not assigned[idx]] + assigned[current_cluster] = True + clusters.append(current_cluster) + prim_idxs.append(star_idxs[i]) + return clusters, np.array(prim_idxs) + +def _calc_blends_bin(star_dat, blend_rad, filters, + primary_filter=None, ext_law='Damineli16'): + """ + Helper function for calc_blends to process 1 bin + """ + # Clean up table and sort by magnitude + if primary_filter is None: + warnings.warn(f"No primary_filter provided. Using {filters[0]}"+ + " to sort stars and flux-weight additional parameters.") + primary_filter = filters[0] + for filt in filters: + star_dat.loc[:,filt] = synthetic.calc_app_mag(star_dat['rad'].to_numpy(), + star_dat[filt].to_numpy(), star_dat['exbv'].to_numpy(), + filt_dict[filt][ext_law]) + star_dat = star_dat[~np.isnan(star_dat[primary_filter])].copy() + star_dat.sort_values(by=primary_filter, inplace=True) + star_dat.reset_index(drop=True,inplace=True) + + # Get coordinates into simplified delta_l_cosb and delta_b frame + l_mean = np.mean(star_dat['glon']); b_mean = np.mean(star_dat['glat']) + all_l, all_b = star_dat['glon'].to_numpy(), star_dat['glat'].to_numpy() + delta_l_cosb = ((all_l-360*(all_l>180))-l_mean)*np.cos(all_b*np.pi/180) + delta_b = all_b-b_mean + all_pts = np.transpose([delta_l_cosb, delta_b]) + star_idxs = star_dat['obj_id'].to_numpy() + + # Set up arrays with necessary data + mags = star_dat[filters].to_numpy() + mags_prim = star_dat[primary_filter].to_numpy() + star_dat.loc[:,'plx'] = 1/star_dat['rad'] + astrom_vals = star_dat[['glon','glat','mu_lcosb', + 'mu_b','plx','exbv']].to_numpy() + + # Run the cluster calculation on the sorted/cleaned points + clusters0, prim_idxs = _calc_blend_clusters(all_pts, blend_rad, star_idxs) + # Turn this into a masked array for easy mag + param sums + clusters_arr = np.array(list(zip_longest(*clusters0, fillvalue=-1))).T + clusters = np.ma.masked_values(clusters_arr, -1) + + # Compute the magnitude sums & weighted astrometry parameters + mags = np.append(mags, [np.repeat(np.inf, len(filters))], axis=0) + bmags = -2.5*np.log10(np.sum(10**(-0.4*mags[clusters,:]),axis=1)) + out = {} + for i,f in enumerate(filters): + out[f] = bmags[:,i] + mags_prim = np.append(mags_prim, [np.inf]) + astrom_vals = np.append(astrom_vals, [np.repeat(0, 6)], axis=0) + fluxes = 10**(-0.4*mags_prim[clusters]) + bvals = (np.sum(fluxes.T*astrom_vals[clusters,:].T, axis=1) + / np.sum(fluxes,axis=1)).T + for i, col in enumerate(['glon','glat','mu_lcosb', + 'mu_b','plx', 'exbv']): + out[col] = bvals[:,i] + out['obj_id'] = prim_idxs + return out + +def calc_blends(hdf5_file, blend_rad, filters, + primary_filter=None, ext_law='Damineli16', + recalc=False, combine_bins=False): + """ + Artificially blend the catalog based on a given radius & + save the blend catalog to a file. Outputs are: summed magnitudes + in selected filters, primary filter flux-weighted positions, + proper motions, parallaxes, and reddening, and the obj_id in + the original table of the brightest star in the blend. + + Parameters + ---------- + hdf5_file : str + Filename of an hdf5 file. + blend_radius : float + blend radius in degrees + filters : list of str + column headers for photometry to blend + primary_filter : str = None + filter to use for astrometric parameter flux-weighting. + if None, use the first filter in the list. + ext_law : str = 'Damineli16' + extinction law + recalc : boolean = False + if True, recalculate and replace existing file + combine_bins : boolean = False + if True, save a single dataset instead of binned data + """ + outfile_name = hdf5_file.replace('.h5', '_blended_catalog.h5') + + if os.path.exists(outfile_name) and recalc == False: + print(f'{outfile_name} exists. Set recalc=True to recalculate.') + return + + hf = h5py.File(hdf5_file, 'r') + outfile = h5py.File(outfile_name, 'a') + + for dset_name in list(hf.keys()): + if (dset_name.startswith('l')) & ('_' not in dset_name): + print(dset_name) + blend_bin = _calc_blends_bin(pd.DataFrame(hf[dset_name][:]), + blend_rad, filters, + primary_filter=primary_filter, ext_law=ext_law) + compound_dtype = synthetic._generate_compound_dtype(blend_bin) + save_data = np.empty(len(blend_bin['plx']), dtype=compound_dtype) + for colname in blend_bin: + save_data[colname] = blend_bin[colname] + if combine_bins: + if 'data' not in outfile: + dataset = outfile.create_dataset('data', shape=(0,), + chunks=(1e4,), + maxshape=(None,), + dtype=compound_dtype) + else: + dataset = outfile['data'] + old_size = dataset.shape[0] + else: + if dset_name not in outfile: + dataset = outfile.create_dataset(dset_name, shape=(0,), + chunks=(1e4,), + maxshape=(None,), + dtype=compound_dtype) + else: + dataset = outfile[dset_name] + old_size = 0 + new_size = old_size + len(blend_bin['plx']) + dataset.resize((new_size, )) + dataset[old_size:new_size] = save_data + hf.close() + outfile.close() diff --git a/popsycle/synthetic.py b/popsycle/synthetic.py index 2bcd0fa..2862ea1 100755 --- a/popsycle/synthetic.py +++ b/popsycle/synthetic.py @@ -6406,11 +6406,11 @@ def calc_app_mag(r, M, E, f): Parameters ---------- - M : float or array - Absolute magnitude of star - r : float or array Distance of star from sun (in kpc) + + M : float or array + Absolute magnitude of star E : float or array Extinction law