Barak 0.3.2 documentation

barak.utilities

Contents

Source code for barak.utilities

""" Various general-use functions."""
from textwrap import wrap
import sys, os
import numpy as np
from math import sqrt

[docs]class Bunch(object): """Bunch class from the python cookbook with __str__ and __repr__ methods. Examples -------- >>> s = Bunch(a=1, b=2, c=['bar', 99]) >>> s.a 1 >>> s.c ['bar', 99] >>> s Bunch(a, b, c) """
[docs] def __init__(self,**kwargs): self.__dict__.update(kwargs)
def __repr__(self): temp = ', '.join(sorted(str(attr) for attr in self.__dict__ if not str(attr).startswith('_'))) return 'Bunch(%s)' % '\n '.join(wrap(temp, width=69))
[docs]class adict(dict): """ A dictionary with attribute-style access. It maps attribute access to the real dictionary."""
[docs] def __init__(self, *args, **kwargs): dict.__init__(self, *args, **kwargs) # the following two methods allow pickling
def __getstate__(self): """Prepare a state of pickling.""" return self.__dict__.items() def __setstate__(self, items): """ Unpickle. """ for key, val in items: self.__dict__[key] = val def __setitem__(self, key, value): return super(adict, self).__setitem__(key, value) def __getitem__(self, name): return super(adict, self).__getitem__(name) def __delitem__(self, name): return super(adict, self).__delitem__(name) def __setattr__(self, key, value): if hasattr(self, key): # make sure existing methods are not overwritten by new # keys. return super(adict, self).__setattr__(key, value) else: return super(adict, self).__setitem__(key, value) __getattr__ = __getitem__
[docs] def copy(self): return adict(self)
[docs]def nan2num(a, replace=0): """ Replace `nan` or `inf` entries with the `replace` keyword value. If replace is "mean", use the mean of the array to replace values. If it's "interp", intepolate from the nearest values. """ a = np.atleast_1d(a) b = np.array(a, copy=True) bad = np.isnan(b) | np.isinf(b) if replace == 'mean' and (~bad).sum() > 0: replace = b[~bad].mean().astype(b.dtype) elif replace == 'interp': x = np.arange(len(a)) replace = np.interp(x[bad], x[~bad], b[~bad]).astype(b.dtype) b[bad] = replace if len(b) == 1: return b[0] return b
[docs]def indgroupby(a, name): """ Find the indices giving rows in `a` that have common values for the field `name`. Parameters ---------- a : structured array Find the indices for rows in this array. name : str Field of `a`. Returns ------- unique_vals, indices: unique sorted values of `a[name]`, and a list of indices into `a` giving the rows with each unique value. """ b = a[name] isort = b.argsort() # find indices in the sorted array where we change to a new value # of name. temp = np.flatnonzero(b[isort][:-1] != b[isort][1:]) jbreaks = np.concatenate([[0], temp + 1, [len(b)]]) # make a list of indices into a for each unique value for j0,j1 in zip(jbreaks[:-1], jbreaks[1:]): ind = isort[j0:j1].view(np.ndarray) # yield the unique value, and the indices into `a` giving rows # with that value yield b[ind[0]], ind
[docs]def between(a, vmin, vmax): """ Return a boolean array True where vmin <= a < vmax. Notes ----- Careful of floating point issues when dealing with equalities. """ a = np.asarray(a) c = a < vmax c &= a >= vmin return c
[docs]def poisson_noise(nflux, nsigma, seed=None): """ Adds poisson noise to a normalised flux array. Parameters ---------- nsigma : float One sigma error in the flux at the continuum level (where normalised flux=1). nflux : array of floats, shape (N,) Normalised flux values (i.e. flux values divided by the continuum). seed : int, optional If given, this is used to seed the random number generator. Returns ------- flux, error : arrays of floats, shape (N,) flux with noise added and one sigma error array. """ if seed is not None: np.random.seed(seed) nflux = np.asarray(nflux) nsigma = float(nsigma) if (nflux < 0).any(): raise Exception('Flux values must be >= 0!') lamb = nflux / (nsigma * nsigma) # variance per pixel fout = np.random.poisson(lamb) fout = np.where(lamb > 0, fout/lamb * nflux, 0) sigout = np.where(lamb > 0, nflux/np.sqrt(lamb), 0) return fout, sigout
[docs]def addnoise(nflux, nsigma, minsig=None, seed=None): """ Add noise to a normalised flux array. Either gaussian, or a combination of poisson and gaussian noise is added to the flux array, depending on the keyword minsig. Parameters ---------- nflux : array of floats, shape (N,) Array of normalised flux values. nsigma : float Total desired noise at the continuum (flux=1). Note the signal-to-noise ratio (SNR) = 1 / nsigma. minsig : float, optional By default minsig is `None`, which means gaussian noise with standard deviation `nsigma` is added to the flux. If minsig is given, a combination of poisson and gaussian noise is added to the flux to give an error of `sigma` at the continuum. In this case the gaussian noise component has st. dev. of `minsig`, which must be less than `nsigma`. seed : int, optional If seed is given, it is used to seed the random number generator. By default the seed is not reset. Returns ------- flux, error : arrays of floats, shape (N,) Normalised flux with noise added, one sigma error array. """ nsigma = abs(float(nsigma)) if seed is not None: np.random.seed(seed) if minsig is None: nflux = np.asarray(nflux) dev = nsigma * np.random.randn(len(nflux)) er = np.empty_like(nflux) er.fill(nsigma) return nflux + dev, er else: minsig = abs(float(minsig)) if minsig > nsigma: raise Exception('Noise at continuum must be bigger than minsig!') # gaussian variance var_g = minsig*minsig # normalised sigma of poisson noise at the continuum sig_p_cont = np.sqrt(nsigma*nsigma - var_g) flnew,sig_p = poisson_noise(nflux, sig_p_cont, seed=seed) # gaussian error flnew += minsig * np.random.randn(len(nflux)) # total sigma er = np.sqrt(sig_p * sig_p + var_g) return flnew, er
[docs]def wmean(val, sig): """ Return the weighted mean and error. Uses inverse variances as weights. Parameters ---------- val : array with shape (N,) Array of values. sig : array with shape (N,) One sigma errors (``sqrt(variance)``) of the array values. Returns ------- wmean, wsigma : floats The weighted mean and error on the weighted mean. """ val = np.asarray(val) sig = np.asarray(sig) # remove any values with bad errors condition = (sig > 0.) & ~np.isnan(val) & ~np.isnan(sig) if not condition.any(): raise ValueError('No good values!') val = val[condition] sig = sig[condition] # normalisation inverse_variance = 1. / (sig*sig) norm = np.sum(inverse_variance) wmean = np.sum(inverse_variance*val) / norm wsigma = 1. / np.sqrt(norm) return wmean, wsigma
[docs]def indexnear(ar, val): """ Find the element in an array closest to a given value. The input array must be sorted lowest to highest. Returns the index of the element with a value closest to the given value. Parameters ---------- ar : array_like Input array. It must be sorted smallest to largest. val : float Find the element of `ar` that is closest to `val`. Returns ------- index : int Index of the `ar` element with the closest value to `val`. Examples -------- >>> wa = np.linspace(4000, 4500, 100) >>> i = indexnear(wa, 4302.5) >>> print i, wa[i] 60 4303.03030303 >>> i = indexnear(wa, 4600.0) >>> print i, wa[i] 99 4500.0 >>> i = indexnear(wa, 3000.0) >>> print i, wa[i] 0 4000.0 """ ar = np.asarray(ar) i = ar.searchsorted(val) # following needed because searchsort rounds up if i == 0: return i # note if i == len(ar) then ar[i] is invalid, but won't get tested. elif i == len(ar) or val - ar[i-1] < ar[i] - val: return i-1 else: return i
[docs]def calc_Mstar_b(z): """ Find M* at a given redshift. Find the Schechter parameter M* in the rest frame B band at redshift z, by interpolating over the Faber at al. 2007 DEEP2 averaged values, and assuming M*_B = -20.0 at z=0 (rough average of the z=0.07-0.1 points in Faber 2007) . """ assert z < 1.5 zvals = 0.0, 0.3, 0.5, 0.7, 0.9, 1.1 Mvals = -20.00, -21.07, -21.15, -21.51, -21.36, -21.54 return np.interp(z, zvals, Mvals)
[docs]def combinations(items, n): """ A generator of combinations from `items`. This returns a generator for the number of ways you can take n items (order unimportant) from a list of items.""" if n == 0: yield [] else: for i in xrange(len(items)): for c in combinations(items[i+1:], n-1): yield [items[i]] + c
[docs]def permutations(items): """ Generator for permutations from `items`. These are a special case of `combinations`. """ return combinations(items, len(items))
[docs]def find_edges_true_regions(condition): """ Finds the indices for the edges of contiguous regions where condition is True. Examples -------- >>> a = np.array([3,0,1,4,6,7,8,6,3,2,0,3,4,5,6,4,2,0,2,5,0,3]) >>> ileft, iright = find_edges_true_regions(a > 2) >>> zip(ileft, iright) [(0, 0), (3, 8), (11, 15), (19, 19), (21, 21)] """ indices, = condition.nonzero() if not len(indices): return None, None iright, = (indices[1:] - indices[:-1] > 1).nonzero() ileft = iright + 1 iright = np.concatenate( (iright, [len(indices)-1]) ) ileft = np.concatenate( ([0], ileft) ) return indices[ileft], indices[iright]
[docs]def stats(arr): """ Show the minimum, maximum median, mean, shape and size of an array. Also show the number of NaN entries (if any). """ arr = np.asarray(arr) shape = arr.shape arr = arr.ravel() size = len(arr) bad = np.isnan(arr) nbad = bad.sum() if nbad == size: return '#NaN %i of %i' % (nbad, size) elif nbad == 0: arr = np.sort(arr) else: arr = np.sort(arr[~bad]) if len(arr) % 2 == 0: i = len(arr) // 2 median = 0.5 * (arr[i-1] + arr[i]) else: median = arr[len(arr) // 2] return 'min %.5g max %.5g median %.5g mean %.5g shape %s #NaN %i of %i' % ( arr[0], arr[-1], median, arr.mean(), shape, nbad, size)
[docs]def Gaussian(x, x0, sigma, height): """ Gaussian.""" return height * np.exp(-0.5 * ((x-x0)/sigma)**2)
[docs]def meshgrid_nd(*arrs): """ Like numpy's meshgrid, but works on more than two dimensions. """ arrs = tuple(reversed(arrs)) lens = map(len, arrs) dim = len(arrs) sz = 1 for s in lens: sz*=s ans = [] for i, arr in enumerate(arrs): slc = [1]*dim slc[i] = lens[i] arr2 = np.asarray(arr).reshape(slc) for j, sz in enumerate(lens): if j != i: arr2 = arr2.repeat(sz, axis=j) ans.append(arr2) return tuple(ans[::-1])
[docs]def autocorr(x, maxlag=300): """ Find the autocorrelation of x. The mean is subtracted from x before correlating. Correlation values are calculated in offset steps from 0 up to maxlag. """ dot = np.dot maxlag = min(maxlag, len(x)-1) x = np.asanyarray(x) x = x - x.mean() a = [1] for k in xrange(1, maxlag): v1 = dot(x[:-k], x[k:]) v2 = dot(x[:-k], x[:-k]) v3 = dot(x[k:], x[k:]) a.append(v1 / sqrt(v2*v3)) return a
[docs]def get_data_path(): """ Return the path to the data directory for this package. """ return os.path.abspath(__file__).rsplit('/', 1)[0] + '/data/'
[docs]def indices_from_grid(c, ref): """ Convert coordinates to indices defined by grid of reference values. Parameters ---------- c : array of floats, shape (M,) Coordinates. ref : array of floats, shape (N,) Reference grid coordinates. They must be equally spaced. Returns ------- ind : arrays of floats Coordinates mapped onto the indices of the reference grid. """ ref = np.sort(ref) dref = ref[1:] - ref[:-1] dref0 = float(dref[0]) assert np.allclose(dref0, dref[1:]) c = np.sort(c) assert c[0] >= ref[0] and c[-1] <= ref[-1] ind = (c - ref[0]) / dref0 return ind

Contents