Source code for barak.spec

""" Contains an object to describe a spectrum, and various
spectrum-related functions."""
from __future__ import division

from itertools import izip
import copy
import os, pdb
from math import sqrt
from pprint import pformat

import numpy as np
import matplotlib.pyplot as pl

from utilities import nan2num, between, get_data_path
from convolve import convolve_psf
from io import readtxt, readtabfits
from plot import axvlines, axvfill, puttext
from constants import c_kms

DATAPATH = get_data_path()

debug = False
[docs]def getwave(hd):
    """ Given a fits header, get the wavelength solution.
    """
    dv=None
    if hd.has_key('CDELT1'):
        dw = hd['CDELT1']
    else:
        dw = hd['CD1_1']
    CRVAL = hd['CRVAL1']
    CRPIX = hd['CRPIX1']
    # wavelength of pixel 1
    wstart = CRVAL + (1 - CRPIX) * dw
    # check if it's log-linear scale (heuristic)
    if CRVAL < 10:
        wstart = 10**wstart
        dv = c_kms * (1. - 1. / 10. ** -dw)
        print 'constant dv = %.3f km/s (assume CRVAL1 in log(Angstroms))' % dv

    npts = hd['NAXIS1']
    return make_wa_scale(wstart, dw, npts, constantdv=dv)

[docs]def find_wa_edges(wa):
    """ Given wavelength bin centres, find the edges of wavelengh
    bins.

    Examples
    --------

    >>> print find_wa_edges([1, 2.1, 3.3, 4.6])
    [ 0.45  1.55  2.7   3.95  5.25]
    """
    wa = np.asarray(wa)
    edges = wa[:-1] + 0.5 * (wa[1:] - wa[:-1])
    edges = [2*wa[0] - edges[0]] + edges.tolist() + [2*wa[-1] - edges[-1]]
    return np.array(edges)

[docs]def make_wa_scale(wstart, dw, npts, constantdv=False, verbose=False):
    """ Generates a wavelength scale from the wstart, dw, and npts
    values.

    >>> print make_wa_scale(40, 1, 5)
    [40., 41., 42., 43., 44.]
    >>> print make_wa_scale(3.5, 1e-3, 5, constantdv=True)
    [3162.278,  3169.568,  3176.874,  3184.198, 3191.537]
    """
    if constantdv:
        if verbose:  print 'make_wa_scale(): Using log-linear scale'
        wa = 10**(wstart + np.arange(npts, dtype=float) * dw)
    else:
        if verbose:  print 'make_wa_scale(): Using linear scale'
        wa = wstart + np.arange(npts, dtype=float) * dw
    return wa

[docs]class Spectrum(object):
    """ A class to hold information about a spectrum.
    
    Attributes
    ----------
    wa : array of floats, shape(N,)
      Wavelength values (overrides all wavelength keywords)
    fl : array of floats, shape(N,)
      Flux.
    er : array of floats, shape(N,)
      Error.
    co : array of floats, shape(N,)
      Continuum.
    dw : float
      Wavelength difference between adjacent pixel centres.
    dv : float
      Velocity difference (km/s)
    fwhm : float
      Instrumental FWHM in km/s
    filename : str
      Filename of spectrum

    Notes
    -----
    If enough information is given, the wavelength scale will be
    generated.  Note that there is no error check if you give
    conflicting wavelength scale information in the keywords!  In this
    case certain combinations of keywords take precendence.  See the
    code comments for details.

    Notes for FITS header::
    
     wstart = CRVAL - (CRPIX - 1.0) * CDELT,  dw = CDELT

    Conversion between velocity width and log-linear pixel width::
    
     dv / c_kms = 1 - 10**(-dw)


    Examples
    --------
    >>> sp = Spectrum(wstart=4000, dw=1, npts=500)
    >>> sp = Spectrum(wstart=4000, dv=60, npts=500)
    >>> sp = Spectrum(wstart=4000, wend=4400, npts=500)
    >>> wa = np.linspace(4000, 5000, 500)
    >>> fl = np.ones(len(wa))
    >>> sp = Spectrum(wa=wa, fl=fl)
    >>> sp = Spectrum(CRVAL=4000, CRPIX=1, CDELT=1, fl=np.ones(500))
    """
[docs]    def __init__(self,
                 dw=None, dv=None, wstart=None, wend=None, npts=None,
                 CRVAL=None, CRPIX=None, CDELT=None,
                 wa=None, fl=None, er=None, co=None,
                 fwhm=None, filename=None):
        """ Create the wavelength scale and initialise attributes."""
        if fl is not None:
            fl = np.asarray(fl)
            fl[np.isinf(fl)] = np.nan
            self.fl = fl
            npts = len(fl)
        if er is not None:
            er = np.asarray(er)
            # replace bad values with NaN
            er[np.isinf(er)|(er<=0.)] = np.nan
            self.er = er
            npts = len(er)
        if co is not None:
            co = np.asarray(co)
            co[np.isinf(co)] = np.nan
            self.co = co
            npts = len(co)

        # Check whether we need to make a wavelength scale.
        makescale = True

        if dv is not None:
            dw = np.log10(1. / (1. - dv / c_kms))

        if None not in (CRVAL, CRPIX, CDELT) :
            wstart = CRVAL - (CRPIX - 1.0) * CDELT
            dw = CDELT
            # check if it's log-linear scale (heuristic)
            if CRVAL < 10:
                wstart = 10**wstart
                dv = c_kms * (1. - 1. / 10. ** -dw)

        if wa is not None:
            wa = np.asarray(wa, float)
            npts = len(wa)
            makescale = False
        elif None not in (wstart, dw, npts):
            if dv is not None:
                wstart = np.log10(wstart)
        elif None not in (wstart, wend, dw):
            if dv is not None:
                wstart, wend = np.log10([wstart,wend])
            # make sure the scale is the same or bigger than the
            # requested wavelength range
            npts = int(np.ceil((wend - wstart) / float(dw)))
        elif None not in (wstart, wend, npts):
            # Make a linear wavelength scale
            dw = (wend - wstart) / (npts - 1.0)
        elif None not in (wend, dw, npts):
            raise ValueError('Please specify wstart instead of wend')
        else:
            raise ValueError('Not enough info to make a wavelength scale!')

        if makescale:
            if debug: print 'making wav scale,', wstart, dw, npts, bool(dv)
            wa = make_wa_scale(wstart, dw, npts, constantdv=bool(dv))
        else:
            # check whether wavelength scale is linear or log-linear
            # (constant velocity)
            diff = wa[1:] - wa[:-1]
            if np.allclose(diff, diff[0]):
                dw = np.median(diff)
            else:
                diff = np.log10(wa[1:]) - np.log10(wa[:-1])
                if np.allclose(diff, diff[0]):
                    dw = np.median(diff)
                    dv = c_kms * (1. - 1. / 10. ** dw)

        # assign remaining attributes
        if fl is None:
            self.fl = np.zeros(npts)
        if er is None:
            self.er = np.empty(npts) * np.nan  # error (one sig)
        if co is None:
            self.co = np.empty(npts) * np.nan

        self.fwhm = fwhm
        self.dw = dw
        self.dv = dv
        self.filename = filename
        self.wa = wa

    def __repr__(self):
        return 'Spectrum(wa, fl, er, co, dw, dv, fwhm, filename)'

[docs]    def multiply(self, val):
        """ Multipy the flux, error and continuum by `val`.
        
        >>> sp = Spectrum(wstart=4000, dw=1, npts=500, fl=np.ones(500))
        >>> sp.multiply(2)
        """
        self.fl *= val
        self.er *= val
        self.co *= val

[docs]    def plot(self, ax=None, show=True, yperc=0.98, alpha=0.8,
             linewidth=1., linestyle='steps-mid',
             flcolor='blue', cocolor='red'):
        """ Plots a spectrum.

        Returns the matplotlib artists that represent the flux, error
        and continuum curves.        
        """
        f,e,c,w = self.fl, self.er, self.co, self.wa
        return plot(w, f, e, c, ax=ax, show=show, yperc=yperc, alpha=alpha,
                    linewidth=linewidth, linestyle=linestyle,
                    flcolor=flcolor, cocolor=cocolor)

[docs]    def stats(self, wa1, wa2, show=False):
        """Calculates statistics (mean, standard deviation (i.e. RMS), mean
        error, etc) of the flux between two wavelength points.

        Returns::
        
         mean flux, RMS of flux, mean error, SNR:
            SNR = (mean flux / RMS)
        """
        i,j = self.wa.searchsorted([wa1, wa2])
        fl = self.fl[i:j]
        er = self.er[i:j]
        good = (er > 0) & ~np.isnan(fl)
        if len(good.nonzero()[0]) == 0:
            print('No good data in this range!')
            return np.nan, np.nan, np.nan, np.nan
        fl = fl[good]
        er = er[good]
        mfl = fl.mean()
        std = fl.std()
        mer = er.mean()
        snr = mfl / std
        if show:
            print 'mean %g, std %g, er %g, snr %g' % (mfl, std, mer, snr)
        return mfl, std, mer, snr

[docs]    def rebin(self, **kwargs):
        """ Class method version of spec.rebin() """
        return rebin(self.wa, self.fl, self.er, **kwargs)

[docs]    def rebin_simple(self, n):
        """ Class method version of spec.rebin_simple()."""
        return rebin_simple(self.wa, self.fl, self.er, self.co, n)
    
[docs]    def write(self, filename, header=None, overwrite=False):
        """ Writes out a spectrum, as ascii - wavelength, flux, error,
        continuum.

        `overwrite` can be True or False.

        `header` is a string to be written to the file before the
        spectrum. A special case is `header='RESVEL'`, which means the
        instrumental fwhm in km/s will be written on the first line
        (VPFIT style).
        """
        if os.path.lexists(filename) and not overwrite:
            c = raw_input('File %s exists - overwrite? (y) or n: ' % filename)
            if c != '':
                if c.strip().lower()[0] == 'n':
                    print 'returning without writing anything...'
                    return
        fh = open(filename, 'w')
        if header is not None:
            if header == 'RESVEL':
                if self.fwhm is None:
                    raise ValueError('Instrumental fwhm is not set!')
                fh.write('RESVEL %.2f' % self.fwhm)
            else:
                fh.write(header)
        fl = np.nan_to_num(self.fl)
        er = np.nan_to_num(self.er)
        if np.all(np.isnan(self.co)):
            for w,f,e in izip(self.wa, fl, er):
                fh.write("% .12g % #12.8g % #12.8g\n" % (w,f,e))
        else:
            co = np.nan_to_num(self.co)
            for w,f,e,c in izip(self.wa, fl, er, co):
                fh.write("% .12g % #12.8g % #12.8g % #12.8g\n" % (w,f,e,c))
        fh.close()
        if self.filename is None:
            self.filename = filename

[docs]def read(filename, comment='#', debug=False):
    """
    Reads in QSO spectrum given a filename.  Returns a Spectrum class
    object:

    Parameters
    ----------
    filename : str
    comment : str ('#')
      String that marks beginning of comment line, only used when
      reading in ascii files
    """
    if filename.endswith('.gz'):
        import gzip
        fh = gzip.open(filename)
    else:
        fh = open(filename,'r')
    test = fh.next()
    fh.close()

    if len(test) < 9 or test[8] != '=': 
    # Then probably not a fits file
        fwhm = None
        skip = 0
        test = test.split()
        try:
            name = test[0].strip()
        except IndexError:
            pass
        else:
            if name.upper() == 'RESVEL':
                fwhm = float(test[1])
                skip = 1
        try:         # uves_popler .dat file            
            wa,fl,er,co = np.loadtxt(filename, usecols=(0,1,2,4), unpack=True,
                                     comments=comment, skiprows=skip)
        except IndexError:
            try:
                wa,fl,er,co = np.loadtxt(filename, usecols=(0,1,2,3),
                                         unpack=True, comments=comment,
                                         skiprows=skip)
            except IndexError:
                try:
                    wa,fl,er = np.loadtxt(filename, usecols=(0,1,2),
                                          unpack=True, comments=comment,
                                          skiprows=skip)
                except IndexError:
                    wa,fl = np.loadtxt(filename, usecols=(0,1),
                                       unpack=True, comments=comment,
                                       skiprows=skip)
                    er = find_err(fl, find_cont(fl))
                co = None
        else:
            # heuristic to check for Jill Bechtold's FOS spectra
            if filename.endswith('.XY'):
                wa,fl,er,co = np.loadtxt(
                    filename, usecols=(0,1,2,3), unpack=True, comments=comment,
                    skiprows=skip)
            else:
                fl *= co
                er *= co

        if wa[0] > wa[-1]:
            wa = wa[::-1];  fl = fl[::-1];
            if er is not None:  er = er[::-1]
            if co is not None:  co = co[::-1]
        sp = Spectrum(wa=wa, fl=fl, er=er, co=co, filename=filename, fwhm=fwhm)
        return sp

    # Otherwise assume fits file
    import pyfits
    f = pyfits.open(filename)
    hd = f[0].header

    # try record array
    try:
        data = f[1].data
    except IndexError:
        pass
    else:
        good = False
        names = data.dtype.names
        if 'wa' in  names and 'fl' in names:
            wa = data.wa
            fl = data.fl
            good = True
        elif 'wavelength' in names and 'flux' in names:
            wa = data.wavelength
            fl = data.flux
            good = True
        if good:
            er = np.ones_like(fl)
            try:
                er = data.er
            except AttributeError:
                pass
            co = np.empty_like(fl) * np.nan
            try:
                co = data.co
            except AttributeError:
                pass
            return Spectrum(wa=wa, fl=fl, er=er, co=co, filename=filename)

    ##################################################################
    #  First generate the wavelength scale.  Look for CTYPE, CRVAL,
    #  CDELT header cards.  Then read in flux values, and look for
    #  cont and error axes/files.
    ##################################################################

    #naxis1 = hd['NAXIS1']     # 1st axis length (no. data points)
    # pixel stepsize
    if hd.has_key('CDELT1'):
        cdelt = hd['CDELT1']
    elif hd.has_key('CD1_1'):
        cdelt = hd['CD1_1']
    else:
        # Read Songaila's spectra
        wa = f[0].data[0]
        fl = f[0].data[1]
        npts = len(fl)
        try:
            er = f[0].data[2]
            if len(er[er > 0] < 0.5 * npts):
                er = f[0].data[3]
        except IndexError:
            i = int(npts * 0.75) 
            er = np.ones(npts) * np.std(fl[i-50:i+50])
        f.close()
        return Spectrum(wa=wa, fl=fl, er=er, filename=filename)

    ##########################################################
    # Check if SDSS spectrum
    ##########################################################
    if hd.has_key('TELESCOP'):
        if hd['TELESCOP'] == 'SDSS 2.5-M':  # then Sloan spectrum
            data = f[0].data
            fl = data[0]
            er = data[2]
            f.close()
            return Spectrum(fl=fl, er=er, filename=filename, CDELT=cdelt,
                            CRVAL=hd['CRVAL1'], CRPIX=hd['CRPIX1'])

    ##########################################################
    # Check if HIRES spectrum
    ##########################################################
    if hd.has_key('INSTRUME'):   #  Check if Keck spectrum
        if hd['INSTRUME'].startswith('HIRES'):
            if debug:  print 'Looks like Makee output format'
            fl = f[0].data       # Flux
            f.close()
            errname = filename[0:filename.rfind('.fits')] + 'e.fits'
            try:
                er = pyfits.getdata(errname)
            except IOError:
                er = np.ones(len(fl))
            return Spectrum(fl=fl, er=er, filename=filename, CDELT=cdelt,
                            CRVAL=hd['CRVAL1'], CRPIX=hd['CRPIX1'])

    ##########################################################
    # Check if UVES_popler output
    ##########################################################
    history = hd.get_history()
    for row in history:
        if 'UVES POst Pipeline Echelle Reduction' in row:
            co = f[0].data[3]
            fl = f[0].data[0] * co #  Flux
            er = f[0].data[1] * co
            f.close()
            return Spectrum(fl=fl, er=er, co=co, filename=filename, CDELT=cdelt,
                            CRVAL=hd['CRVAL1'], CRPIX=hd['CRPIX1'])

    data = f[0].data
    fl = data[0]
    er = data[2]
    f.close()
    if hd.has_key('CRPIX1'):
        crpix = hd['CRPIX1']
    else:
        crpix = 1
    return Spectrum(fl=fl, er=er, filename=filename, CDELT=cdelt,
                    CRVAL=hd['CRVAL1'], CRPIX=crpix)
    #raise Exception('Unknown file format')

[docs]def rebin_simple(wa, fl, er, co, n):
    """ Bins up the spectrum by averaging the values of every n
    pixels. Not very accurate, but much faster than rebin().
    """ 
    remain = -(len(wa) % n) or None
    wa = wa[:remain].reshape(-1, n)
    fl = fl[:remain].reshape(-1, n)
    er = er[:remain].reshape(-1, n)
    co = co[:remain].reshape(-1, n)
    n = float(n)
    wa = np.nansum(wa, axis=1) / n 
    co = np.nansum(co, axis=1) / n 
    er = np.nansum(er, axis=1) / n / sqrt(n) 
    fl = np.nansum(fl, axis=1) / n 

    return Spectrum(wa=wa, fl=fl, er=er, co=co)

[docs]def rebin(wav, fl, er, **kwargs):
    """ Rebins spectrum to a new wavelength scale generated using the
    keyword parameters.

    Returns the rebinned spectrum.

    Accepts the same keywords as Spectrum.__init__() (see that
    docstring for a description of those keywords)

    Will probably get the flux and errors for the first and last pixel
    of the rebinned spectrum wrong.

    General pointers about rebinning if you care about errors in the
    rebinned values:

    1. Don't rebin to a smaller bin size.
    2. Be aware when you rebin you introduce correlations between
       neighbouring points and between their errors.
    3. Rebin as few times as possible.

    """
    # Note: 0 suffix indicates the old spectrum, 1 the rebinned spectrum.
    colors= 'brgy'
    debug = kwargs.pop('debug', False)
    
    # Create rebinned spectrum wavelength scale
    sp1 = Spectrum(**kwargs)
    # find pixel edges, used when rebinning
    edges0 = find_wa_edges(wav)
    edges1 = find_wa_edges(sp1.wa)
    if debug:
        pl.clf()
        x0,x1 = edges1[0:2]
        yh, = pl.bar(x0, 0, width=(x1-x0),color='gray',
                    linestyle='dotted',alpha=0.3)
    widths0 = edges0[1:] - edges0[:-1]
    npts0 = len(wav)
    npts1 = len(sp1.wa)
    df = 0.
    de2 = 0.
    npix = 0    # number of old pixels contributing to rebinned pixel,
    j = 0                # index of rebinned array
    i = 0                # index of old array

    # sanity check
    if edges0[-1] < edges1[0] or edges1[-1] < edges0[0]:
        raise ValueError('Wavelength scales do not overlap!')
    
    # find the first contributing old pixel to the rebinned spectrum
    if edges0[i+1] < edges1[0]:
        # Old wa scale extends lower than the rebinned scale. Find the
        # first old pixel that overlaps with rebinned scale.
        while edges0[i+1] < edges1[0]:
            i += 1
        i -= 1
    elif edges0[0] > edges1[j+1]:
        # New rebinned wa scale extends lower than the old scale. Find
        # the first rebinned pixel that overlaps with the old spectrum
        while edges0[0] > edges1[j+1]:
            sp1.fl[j] = np.nan
            sp1.er[j] = np.nan
            j += 1
        j -= 1
    lo0 = edges0[i]      # low edge of contr. (sub-)pixel in old scale
    while True:
        hi0 = edges0[i+1]  # upper edge of contr. (sub-)pixel in old scale
        hi1 = edges1[j+1]  # upper edge of jth pixel in rebinned scale

        if hi0 < hi1:
            if er[i] > 0:
                dpix = (hi0 - lo0) / widths0[i]
                df += fl[i] * dpix
                # We don't square dpix below, since this causes an
                # artificial variation in the rebinned errors depending on
                # how the old wav bins are divided up into the rebinned
                # wav bins.
                #
                # i.e. 0.25**2 + 0.75**2 != 0.5**2 + 0.5**2 != 1**2
                de2 += er[i]**2 * dpix
                npix += dpix
            if debug:
                yh.set_height(df/npix)
                c0 = colors[i % len(colors)]
                pl.bar(lo0, fl[i], width=hi0-lo0, color=c0, alpha=0.3)
                pl.text(lo0, fl[i], 'lo0')
                pl.text(hi0, fl[i], 'hi0')
                pl.text(hi1, fl[i], 'hi1')
                raw_input('enter...')
            lo0 = hi0
            i += 1
            if i == npts0:  break
        else:
            # We have all old pixel flux values that contribute to the
            # new pixel; append the new flux value and move to the
            # next new pixel.
            if er[i] > 0:
                dpix = (hi1 - lo0) / widths0[i]
                df += fl[i] * dpix
                de2 += er[i]**2 * dpix
                npix += dpix
            if debug:
                yh.set_height(df/npix)
                c0 = colors[i % len(colors)]
                pl.bar(lo0,  fl[i], width=hi1-lo0, color=c0, alpha=0.3)
                pl.text(lo0, fl[i], 'lo0')
                pl.text(hi0, fl[i], 'hi0')
                pl.text(hi1, fl[i], 'hi1')
                raw_input('df, de2, npix: %s %s %s   enter...' %
                          (df, de2, npix))
            if npix > 0:
                # find total flux and error, then divide by number of
                # pixels (i.e. conserve flux density).
                sp1.fl[j] = df / npix
                sp1.er[j] = sqrt(de2) / npix
            else:
                sp1.fl[j] = np.nan
                sp1.er[j] = np.nan
            df = 0.
            de2 = 0.
            npix = 0.
            lo0 = hi1
            j += 1
            if j == npts1:  break
            if debug:
                x0,x1 = edges1[j:j+2]
                yh, = pl.bar(x0, 0, width=x1-x0, color='gray',
                       linestyle='dotted', alpha=0.3)
                raw_input('enter...')

    return sp1

[docs]def combine(spectra, cliphi=None, cliplo=None, verbose=False):
    """ Combine spectra pixel by pixel, weighting by the inverse variance
    of each pixel.  Clip high sigma values by sigma times clip values
    Returns the combined spectrum.

    If the wavelength scales of the input spectra differ, combine()
    will rebin the spectra to a common linear (not log-linear)
    wavelength scale, with pixel width equal to the largest pixel
    width in the input spectra. If this is not what you want, rebin
    the spectra by hand with rebin() before using combine().
    """
    def clip(cliphi, cliplo, s_rebinned):
        # clip the rebinned input spectra

        # find pixels where we can clip: where we have at least three
        # good contributing values.
        goodpix = np.zeros(len(s_rebinned[0].wa))
        for s in s_rebinned:
            goodpix += (s.er > 0).astype(int)
        canclip = goodpix > 2
        # find median values
        medfl = np.median([s.fl[canclip] for s in s_rebinned], axis=0)
        nclipped = 0
        for i,s in enumerate(s_rebinned):
            fl = s.fl[canclip]
            er = s.er[canclip]
            diff = (fl - medfl) / er
            if cliphi is not None:
                badpix = diff > cliphi
                s_rebinned[i].er[canclip][badpix] = np.nan
                nclipped += len(badpix.nonzero()[0])
            if cliplo is not None:
                badpix = diff < -cliplo
                s_rebinned[i].er[canclip][badpix] = np.nan
                nclipped += len(badpix.nonzero()[0])
        if debug: print nclipped, 'pixels clipped across all input spectra'
        return nclipped

    nspectra = len(spectra)
    if verbose:
        print '%s spectra to combine' % nspectra
    if nspectra < 2:
        raise Exception('Need at least 2 spectra to combine.')

    if cliphi is not None and nspectra < 3:  cliphi = None
    if cliplo is not None and nspectra < 3:  cliplo = None

    # Check if wavescales are the same:
    spec0 = spectra[0]
    wa = spec0.wa
    npts = len(wa)
    needrebin = True
    for sp in spectra:
        if len(sp.wa) != npts:
            if verbose: print 'Rebin required'
            break
        if (np.abs(sp.wa - wa) / wa[0]).max() > 1e-8:
            if verbose:
                print (np.abs(sp.wa - wa) / wa[0]).max(), 'Rebin required'
            break
    else:
        needrebin = False
        if verbose:  print 'No rebin required'

    # interpolate over 1 sigma error arrays
    
    if needrebin:
        # Make wavelength scale for combined spectrum.  Only linear for now.
        wstart = min(sp.wa[0] for sp in spectra)
        wend = max(sp.wa[-1] for sp in spectra)
        # Choose largest wavelength bin size of old spectra.
        if verbose:  print 'finding new bin size'
        maxwidth = max((sp.wa[1:] - sp.wa[:-1]).max() for sp in spectra)
        npts = int(np.ceil((wend - wstart) / maxwidth))      # round up
        # rebin spectra to combined wavelength scale
        if verbose:  print 'Rebinning spectra'
        s_rebinned = [s.rebin(wstart=wstart, npts=npts, dw=maxwidth)
                      for s in spectra]
        combined = Spectrum(wstart=wstart, npts=npts, dw=maxwidth)
        if verbose:
            print ('New wavelength scale wstart=%s, wend=%s, npts=%s, dw=%s'
                   % (wstart, combined.wa[-1], npts, maxwidth))
    else:
        combined = Spectrum(wa=spec0.wa)
        s_rebinned = copy.deepcopy(spectra)

    # sigma clipping, if requested
    if cliphi is not None or cliplo is not None:
        clip(cliphi, cliplo, s_rebinned)
        # repeat, clipping to 4 sigma this time
        #npixclipped = clip(4.,4.,s_rebinned)

    # Now add the spectra
    for i in xrange(len(combined.wa)):
        wtot = fltot = ertot = 0.
        npix = 0            # num of old spectrum pixels contributing to new
        for s in s_rebinned:
            # if not a sensible flux value, skip to the next pixel
            if s.er[i] > 0:
                npix += 1
                # Weighted mean (weight by inverse variance)
                variance = s.er[i] ** 2
                w = 1. / variance
                fltot += s.fl[i] * w
                ertot += (s.er[i] * w)**2
                wtot += w
        if npix > 0:
            combined.fl[i] = fltot / wtot
            combined.er[i] = np.sqrt(ertot) / wtot
        else:
            combined.fl[i] = np.nan
            combined.er[i] = np.nan

        #contributing.fl[i] = npix_contrib
    return combined

[docs]def cr_reject(flux, error, nsigma=15.0, npix=2, verbose=False):
    """ Given flux and errors, rejects cosmic-ray type or dead
    pixels. These are defined as pixels that are more than
    nsigma*sigma above or below the median of the npixEL pixels on
    either side.

    Returns newflux,newerror where the rejected pixels have been
    replaced by the median value of npix to either side, and the
    error has been set to NaN.

    The default values work ok for S/N~20, Resolution=500 spectra.
    """
    if verbose:  print nsigma,npix
    flux,error = list(flux), list(error)  # make copies
    i1 = npix
    i2 = len(flux) - npix
    for i in range(i1, i2):
        # make a list of flux values used to find the median
        fl = flux[i-npix:i] + flux[i+1:i+1+npix]
        er = error[i-npix:i] + error[i+1:i+1+npix]
        fl = [f for f,e in zip(fl,er) if e > 0]
        er = [e for e in er if e > 0]
        medfl = np.median(fl)
        meder = np.median(er)
        if np.abs((flux[i] - medfl) / meder) > nsigma:
            flux[i] = medfl
            error[i] = np.nan
            if verbose:  print len(fl), len(er)

    return np.array(flux), np.array(error)

[docs]def cr_reject2(fl, er, nsig=10.0, fwhm=2, grow=1, debug=True):
    """ interpolate across features that have widths smaller than the
    expected fwhm resolution.

    Parameters
    ----------
    fwhm: int
      Resolution fwhm in pixels
    fl : array of floats, shape (N,)
      Flux
    er : array of floats, shape (N,)
      Error

    Returns the interpolated flux and error arrays.
    """
    fl, er = (np.array(a, dtype=float) for a in (fl, er))
    # interpolate over bad pixels 
    fl1 = convolve_psf(fl, fwhm)
    ibad = np.where(np.abs(fl1 - fl) > nsig*er)[0]
    if debug: print len(ibad)
    extras1 = np.concatenate([ibad + 1 + i for i in range(grow)]) 
    extras2 = np.concatenate([ibad - 1 - i for i in range(grow)]) 
    ibad = np.union1d(ibad, np.union1d(extras1, extras2))
    ibad = ibad[(ibad > -1) & (ibad < len(fl))]
    igood = np.setdiff1d(np.arange(len(fl1)), ibad)
    fl[ibad] = np.interp(ibad, igood, fl[igood])
    er[ibad] = np.nan
    return fl,er

[docs]def scalemult(w0, f0, e0, w1, f1, e1, mask=None):
    """ find the constant to multipy f1 by so its median will match
    f0 where they overlap in wavelength.
    
    Errors are just used to identify bad pixels, if they are all > 0
    they are ignored.

    `mask` is optional, and of the form `[(3500,4500), (4650,4680)]`
    to mask wavelengths from 3500 to 4500 Ang, and 4650 to 4680 Ang
    for example.
    """
    w0,f0,e0,w1,f1,e1 = map(np.asarray, [w0,f0,e0,w1,f1,e1])
    masked0 = np.zeros(len(w0), bool)
    masked1 = np.zeros(len(w0), bool)
    if mask is not None:
        for wmin,wmax in mask:
            masked0 |= (wmin < w0) & (w0 < wmax)  
            masked1 |= (wmin < w1) & (w1 < wmax)  

    wmin = max(w0.min(), w1.min())
    wmax = min(w0.max(), w1.max())
    
    good0 = (e0 > 0) & ~np.isnan(f0) & ~masked0 & (wmin < w0) & (w0 < wmax)
    good1 = (e1 > 0) & ~np.isnan(f1) & ~masked1 & (wmin < w1) & (w1 < wmax)
    if good0.sum() < 3 or good1.sum() < 3:
        raise ValueError('Too few good pixels to use for scaling')

    med0 = np.median(f0[good0])
    med1 = np.median(f1[good1])

    if not (med0 > 0) or not (med1 > 0):
        raise ValueError('bad medians:', med0, med1)

    return med0 / med1

[docs]def scale_overlap(w0, f0, e0, w1, f1, e1):
    """ Scale two spectra to match where they overlap. Assumes
    spectrum 0 covers a lower wavelength range than spectrum 1. If no
    good regions overlap, regions close to the overlap are searched
    for.

    Returns::

     scale_factor : float
         Multiply spectrum 1 by scale_factor to match spectrum 0.
    """

   # find overlapping regions
    dtype = [('wa','f8'),('fl','f8'),('er','f8')]
    sp0 = np.rec.fromarrays([w0,f0,e0], dtype=dtype)
    sp1 = np.rec.fromarrays([w1,f1,e1], dtype=dtype)
    if sp0.wa.max() < sp1.wa.min():
        raise ValueError('No overlap!')
    # find first overlapping good pixel
    i = 0
    while np.isnan(sp1.er[i]):  i += 1
    i0min = sp0.wa.searchsorted(sp1.wa[i])
    i = -1
    while np.isnan(sp0.er[i]):  i -= 1
    i1max = sp1.wa.searchsorted(sp0.wa[i])
    while True:
        s0 = sp0[i0min:]
        s1 = sp1[:i1max]
        good0 = (s0.er > 0) & ~np.isnan(s0.fl)
        good1 = (s1.er > 0) & ~np.isnan(s1.fl)
        if good0.sum() == 0 or good1.sum() == 0:
            i0min = i0min - (len(sp0) - i0min)
            i1max = 2 * i1max
            if i0min < 0 or i1max > len(sp1)-1:
                raise ValueError('No good pixels to use for scaling')
            continue
        m0, m1 = np.median(s0.fl[good0]), np.median(s1.fl[good1])
        if m0 <= 0:
            print 'looping'
            i0min = max(0, i0min - (len(sp0) - i0min))
        elif m1 <= 0:
            print 'looping'
            i1max = min(len(sp1)-1, 2 * i1max)
        else:
            break
        
    if debug:  print m0,m1
    return m0 / m1


[docs]def plotlines(z, ax, atmos=None, lines=None, labels=False, ls='dotted',
              color='k', trim=False, fontsize=10, **kwargs):
    """ Draw vertical dotted lines showing expected positions of
    absorption and emission lines, given a redshift.

    Parameters
    ----------
    atmos : list of float pairs, or True (None)
      Regions of atmospheric absorption to plot. If True, it uses an
      internal list of regions.
    lines : stuctured array, optional
      If given, it must be a record array with fields 'name' and 'wa'.

    Returns the mpl artists representing the lines.
    """
    if lines is None:
        lines = readtxt(DATAPATH + 'linelists/galaxy_lines', names='wa,name,select')
    else:
        lines = np.rec.fromrecords([(l['name'], l['wa']) for l in lines],
                                   names='name,wa')
    autoscale = ax.get_autoscale_on()
    if autoscale:
        ax.set_autoscale_on(False)
    artists = []
    w0,w1 = ax.get_xlim()
    wa = lines.wa * (z+1)
    if trim:
        c0 = between(wa,w0,w1)
        wa = wa[c0]
        lines = lines[c0]
    artists.append(axvlines(wa, ax=ax, ls=ls, color=color, **kwargs))
    if labels:
        for i in range(3):
            for w,l in zip(wa[i::3], lines[i::3]):
                if not (w0 < w < w1) and trim:
                    continue
                #name = l.name + '%.2f' % l.wa
                name = l.name
                artists.append(puttext(
                    w, 0.7 + i*0.08, name, ax,
                    xcoord='data', alpha=1, fontsize=fontsize,
                    rotation=90, ha='right', color=color))
    if atmos:
        if atmos == True:
            atmos = None
        artists.append(plotatmos(ax, atmos=atmos))
    if autoscale:
        ax.set_autoscale_on(True)

    return artists


[docs]def plotatmos(ax, atmos=None, color='y'):
    """ Plot rough areas where atmospheric absorption is expected.
    """
    autoscale = ax.get_autoscale_on()
    if autoscale:
        ax.set_autoscale_on(False)
    artists = []
    if atmos is None:
        atmos = [(5570, 5590),
                 (5885, 5900),
                 (6275, 6325),
                 (6870, 6950),
                 (7170, 7350),
                 (7580, 7690),
                 (8130, 8350),
                 (8900, 9200),
                 (9300, 9850),
                 (11100, 11620),
                 (12590, 12790),
                 (13035, 15110),
                 (17435, 20850),
                 (24150, 24800)]

    artists = axvfill(atmos, ax=ax, color=color, alpha=0.15)
    if autoscale:
        ax.set_autoscale_on(True)
    return artists

[docs]def plot(w, f=None, e=None, c=None, ax=None, show=True, yperc=0.98, alpha=0.8,
         linewidth=1., linestyle='steps-mid', flcolor='blue', cocolor='red'):
    """ Plots spectrum.

    Returns the matplotlib artists that represent the flux, error
    and continuum curves.

    Can also give a single argument that is a record array with fields
    wa, fl and optionally er, co.

    >>> from utilities import get_data_path
    >>> sp = read(get_data_path() + 'tests/spSpec-52017-0516-139.fit.gz')
    >>> lines = plot(sp.wa, sp.fl, sp.er, sp.co, show=False)
    """
    witherr = True
    if f is None and e is None and c is None:
        rec = w
        w = rec.wa
        f = rec.fl
        try:
            e = rec.er
        except AttributeError:
            e = w * np.nan
            witherr = False
        try:
            c = rec.co
        except AttributeError:
            c = w * np.nan
    elif e is None:
        e = w * np.nan
        witherr = False
    elif c is None:
        c = w * np.nan

    if ax is None:
        fig = pl.figure(figsize=(10,5))
        fig.subplots_adjust(left=0.08, right=0.94)
        ax = pl.gca()

    artists = []
    # check we have something to plot...
    
    good = ~np.isnan(f)
    if witherr:
        good &= (e > 0)
    if np.any(good):
        artists.extend(ax.plot(w, f, lw=linewidth, color=flcolor,
                               alpha=alpha, ls=linestyle))
        if witherr:
            artists.extend(ax.plot(w, e, lw=linewidth, color=flcolor,
                                   alpha=alpha, ls='dashed'))
        artists.extend(ax.plot(w, c, lw=linewidth, color=cocolor,
                               alpha=alpha))
        # plotting limits
        f = f[good]
        ymax = 1.5 * np.percentile(f, 100*yperc)
        if witherr:
            e = e[good]
            ymin = min(-0.1 * np.median(f), -1.0 * np.median(e))
        else:
            ymin = -abs(0.1*ymax)

        if ymax < ymin:
            ymax = 2. * abs(ymin)
        ax.axis((w.min(), w.max(), ymin, ymax))

    ax.axhline(y=0., color='k', alpha=alpha)
    if show:
        pl.show()
    return artists


[docs]def writesp(filename, sp, resvel=None, overwrite=False):
    """ Writes out a spectrum, as ascii for now - wavelength, flux,
    error, continuum.

    sp must have attributes wa, fl, er and optionally co.

    Keyword overwrite can be True or False.

    resvel means the instrumental fwhm in km/s will be written on the
    first line (VPFIT style).
    """
    if os.path.lexists(filename) and not overwrite:
        c = raw_input('File %s exists - overwrite? (y) or n: ' % filename)
        if c != '':
            if c.strip().lower()[0] == 'n':
                print 'returning without writing anything...'
                return
    fh = open(filename, 'w')
    if resvel is not None:
        fh.write('RESVEL %.2f' % resvel)

    fl = np.nan_to_num(sp.fl)
    er = np.nan_to_num(sp.er)
    if not hasattr(sp, 'co') or np.all(np.isnan(sp.co)):
        for w,f,e in izip(sp.wa, fl, er):
            fh.write("% .12g % #12.8g % #12.8g\n" % (w,f,e))
    else:
        co = np.nan_to_num(sp.co)
        for w,f,e,c in izip(sp.wa, fl, er, co):
            fh.write("% .12g % #12.8g % #12.8g % #12.8g\n" % (w,f,e,c))
    fh.close()


    
[docs]def find_cont(fl, fwhm1=300, fwhm2=200, nchunks=4):
    """ Given the flux, estimate the continuum. fwhm values are
    smoothing lengths.
    """
    # smooth flux, with smoothing length much longer than expected
    # emission line widths.
    fl = nan2num(fl.astype(float), replace='mean')
    co = convolve_psf(fl, fwhm1, edge=10)
    
    npts = len(fl)
    indices = np.arange(npts)
    
    # throw away top and bottom 2% of data that deviates from
    # continuum and re-fit new continuum. Go chunk by chunk so that
    # points are thrown away evenly across the spectrum.
    nfl = fl / co
    step = npts // nchunks  + 1
    ind = range(0, npts, step) + [npts]
    igood = []
    for i0,i1 in zip(ind[:-1], ind[1:]):
        isort = nfl[i0:i1].argsort()
        len_isort = len(isort)
        j0,j1 = int(0.05 * len_isort), int(0.95 * len_isort)
        igood.extend(isort[j0:j1]+i0)

    good = np.in1d(indices, igood)
    sfl = fl.copy()

    sfl[~good] = np.interp(indices[~good], indices[good], sfl[good])

    co = convolve_psf(sfl, fwhm2, edge=10)
    return co
    
[docs]def find_err(fl, co, nchunks=10):
    """ Given a continuum and flux array, return a very rough estimate
    of the error.
    """
    rms = []
    midpoints = []
    npts = len(fl)
    step = npts // nchunks  + 1
    indices = range(0, npts, step) + [npts]
    for i,j in zip(indices[:-1], indices[1:]):
        #print i,j
        imid = int(0.5 * (i + j))
        midpoints.append(imid)
        df = fl[i:j] - co[i:j]
        n = len(df)
        # throw away top and bottom 5% of these
        df = np.sort(df)[int(0.05*n):int(0.95*n)]
        rms.append(df.std())
    er = np.interp(np.arange(len(fl)), midpoints, rms)
    return er

[docs]def pca_qso_cont(nspec, seed=None, return_weights=False):
    """ Make qso continua using the PCA and weights from N. Suzuki et
    al. 2005 and N. Suzuki 2006.

    Parameters
    ----------
    nspec : int
      Number of spectra to create

    Returns
    -------
    wavelength (shape N), array of spectra [shape (nspec, N)]

    Memory use might be prohibitive for nspec > ~1e4.
    """
    # read the principle components
    filename = DATAPATH + '/PCAcont/Suzuki05/tab3.txt'
    names = 'wa,mu,musig,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10'
    co = readtxt(filename, skip=23, names=names)
    # use only the first 7 eigenvetors
    eig = [co['e%i' % i] for i in range(1,8)]
    # from Suzuki et al 2006.
    csig = np.array([7.563, 3.604, 2.351, 2.148, 1.586,
                     1.479, 1.137]) #, 0.778, 0.735, 0.673])

    # generate weights for each eigenvector
    if seed is not None:  np.random.seed(seed)
    weights = []
    for sig in csig:
        temp = np.random.randn(2*nspec)
        # make sure we don't have any very large deviations from the mean
        temp = temp[np.abs(temp) < 3][:nspec]
        assert len(temp) == nspec
        weights.append(temp * sig)

    # generate nspec continua. loop over pixels
    sp = []
    for i in range(len(co.wa)):
        sp.append(co.mu[i] + np.sum(w*e[i] for w,e in zip(weights, eig)))

    sp = np.transpose(sp)
    if return_weights:
        return co.wa, sp, weights
    else:
        return co.wa, sp

[docs]def vac2air_Ciddor(vacw):
    """ Convert vacuum wavelengths in Angstroms to air wavelengths.

    This uses the relation from Ciddor 1996, Applied Optics LP,
    vol. 35, Issue 9, p.1566. Only valid for wavelengths > 2000 Ang.
    """
    vacw = np.atleast_1d(vacw)
    k0 = 238.0185
    k1 = 1e-8 * 5792105.
    k2 = 57.362
    k3 = 1e-8 * 167917.
    s2 = (1e4 / vacw)**2
    n = 1 + k1/(k0 - s2) + k3/(k2 - s2)
    airw = vacw / n

    if len(airw) == 1:
        return airw[0]
    return airw

[docs]def vac2air_Morton(vacw):
    """ Convert vacuum wavelengths in Angstroms to air wavelengths.
    
    This uses the relation from Morton 1991, ApJS, 77, 119. Only valid
    for wavelengths > 2000 Ang.  Use this for compatibility with older
    spectra that may have been corrected using the (older) Morton
    relation.  The Ciddor relation used in vac2air_Ciddor() is claimed
    to be more accurate at IR wavelengths.
    """
    vacw = np.atleast_1d(vacw)
    temp = (1e4 / vacw) ** 2
    airw = 1. / (1. + 6.4328e-5 + 2.94981e-2/(146 - temp) +
                 2.5540e-4/(41 - temp)) * vacw
    if len(airw) == 1:
        return airw[0]
    return airw

[docs]def air2vac_Morton(airw):
    """ Convert air wavelengths in Angstroms to vacuum wavelengths.
    
    Uses linear interpolation of the inverse transformation for
    vac2air_Morton. The fractional error (wa - watrue) / watrue
    introduced by interpolation is < 1e-9.

    Only valid for wa > 2000 Angstroms.
    """
    airw = np.atleast_1d(airw)
    if (np.diff(airw) < 0).any():        raise ValueError('Wavelengths must be sorted lowest to highest')
    if airw[0] < 2000:
        raise ValueError('Only valid for wavelengths > 2000 Angstroms')
    dwmax = abs(vac2air_Morton(airw[-1]) - airw[-1]) + 10
    dwmin = abs(vac2air_Morton(airw[0]) - airw[0]) + 10
    testvac = np.arange(airw[0] - dwmin, airw[-1] + dwmax, 2)
    testair = vac2air_Morton(testvac)
    vacw = np.interp(airw, testair, testvac)
    
    if len(vacw) == 1:
        return vacw[0]
    return vacw

[docs]def air2vac_Ciddor(airw):
    """ Convert air wavelengths in Angstroms to vacuum wavelengths.
    
    Uses linear interpolation of the inverse transformation for
    vac2air_Ciddor. The fractional error (wa - watrue) / watrue
    introduced by interpolation is < 1e-9.

    Only valid for wa > 2000 Angstroms.
    """
    airw = np.atleast_1d(airw)
    if (np.diff(airw) < 0).any():
        raise ValueError('Wavelengths must be sorted lowest to highest')
    if airw[0] < 2000:
        raise ValueError('Only valid for wavelengths > 2000 Angstroms')

    dwmax = abs(vac2air_Ciddor(airw[-1]) - airw[-1]) + 10
    dwmin = abs(vac2air_Ciddor(airw[0]) - airw[0]) + 10
    testvac = np.arange(airw[0] - dwmin, airw[-1] + dwmax, 2)
    testair = vac2air_Ciddor(testvac)
    vacw = np.interp(airw, testair, testvac)

    if len(vacw) == 1:
        return vacw[0]
    return vacw
Barak 0.3.2 documentation

barak.spec

Source code for barak.spec