Source code for muse_origin.origin

import datetime
import glob
import inspect
import logging
import os
import shutil
import sys
import warnings
from collections import OrderedDict
from logging.handlers import RotatingFileHandler

import matplotlib.pyplot as plt
import numpy as np
from astropy.io import fits
from astropy.table import Table
from astropy.utils import lazyproperty
from mpdaf.log import setup_logging
from mpdaf.MUSE import FieldsMap
from mpdaf.obj import Cube, Image

from . import steps
from .lib_origin import timeit
from .version import __version__

try:
    # With PyYaml 5.1, load and safe have been renamed to unsafe_* and
    # replaced by the safe_* functions. We need the full ones to
    # be able to dump Python objects, yay!
    from yaml import unsafe_load as load_yaml, dump as dump_yaml
except ImportError:  # pragma: no cover
    from yaml import load as load_yaml, dump as dump_yaml

CURDIR = os.path.dirname(os.path.abspath(__file__))


[docs]class ORIGIN(steps.LogMixin): """ORIGIN: detectiOn and extRactIon of Galaxy emIssion liNes This is the main class to interact with all the steps. An Origin object is mainly composed by: - cube data (raw data and covariance) - 1D dictionary of spectral profiles - MUSE PSF Attributes ---------- path : str Path where the ORIGIN data will be stored. name : str Name of the session and basename for the sources. param : dict Parameters values. cube_raw : array (Nz, Ny, Nx) Raw data. var : array (Nz, Ny, Nx) Variance. wcs : `mpdaf.obj.WCS` RA-DEC coordinates. wave : `mpdaf.obj.WaveCoord` Spectral coordinates. profiles : list of array List of spectral profiles to test FWHM_profiles : list FWHM of the profiles in pixels. wfields : None or list of arrays List of weight maps (one per fields in the case of MUSE mosaic) None: just one field PSF : array (Nz, PSF_size, PSF_size) or list of arrays MUSE PSF (one per field) LBDA_FWHM_PSF: list of floats Value of the FWMH of the PSF in pixel for each wavelength step (mean of the fields). FWHM_PSF : float or list of float Mean of the fwhm of the PSF in pixel (one per field). imawhite : `~mpdaf.obj.Image` White image segmap : `~mpdaf.obj.Image` Segmentation map cube_std : `~mpdaf.obj.Cube` standardized data for PCA. Result of step01. cont_dct : `~mpdaf.obj.Cube` DCT continuum. Result of step01. ima_std : `~mpdaf.obj.Image` Mean of standardized data for PCA along the wavelength axis. Result of step01. ima_dct : `~mpdaf.obj.Image` Mean of DCT continuum cube along the wavelength axis. Result of step01. nbAreas : int Number of area (segmentation) for the PCA computation. Result of step02. areamap : `~mpdaf.obj.Image` PCA area. Result of step02. testO2 : list of arrays (one per PCA area) Result of the O2 test (step03). histO2 : list of arrays (one per PCA area) PCA histogram (step03). binO2 : list of arrays (one per PCA area) Bins for the PCA histogram (step03). thresO2 : list of float For each area, threshold value (step03). meaO2 : list of float Location parameter of the Gaussian fit used to estimate the threshold (step03). stdO2 : list of float Scale parameter of the Gaussian fit used to estimate the threshold (step03). cube_faint : `~mpdaf.obj.Cube` Projection on the eigenvectors associated to the lower eigenvalues of the data cube (representing the faint signal). Result of step04. mapO2 : `~mpdaf.obj.Image` The numbers of iterations used by testO2 for each spaxel. Result of step04. cube_correl : `~mpdaf.obj.Cube` Cube of T_GLR values (step05). cube_profile : `~mpdaf.obj.Cube` (type int) PSF profile associated to the T_GLR (step05). maxmap : `~mpdaf.obj.Image` Map of maxima along the wavelength axis (step05). cube_local_max : `~mpdaf.obj.Cube` Local maxima from max correlation (step05). cube_local_min : `~mpdaf.obj.Cube` Local maxima from min correlation (step05). threshold : float Estimated threshold (step06). Pval : `astropy.table.Table` Table with the purity results for each threshold (step06): - PVal_r : The purity function - index_pval : index value to plot - Det_m : Number of detections (-DATA) - Det_M : Number of detections (+DATA) Cat0 : `astropy.table.Table` Catalog returned by step07 Pval_comp : `astropy.table.Table` Table with the purity results for each threshold in compl (step08): - PVal_r : The purity function - index_pval : index value to plot - Det_m : Number of detections (-DATA) - Det_M : Number of detections (+DATA) Cat1 : `astropy.table.Table` Catalog returned by step08 spectra : list of `~mpdaf.obj.Spectrum` Estimated lines. Result of step09. Cat2 : `astropy.table.Table` Catalog returned by step09. """ def __init__( self, filename, name="origin", path=".", loglevel="DEBUG", logcolor=False, fieldmap=None, profiles=None, PSF=None, LBDA_FWHM_PSF=None, FWHM_PSF=None, PSF_size=25, param=None, imawhite=None, wfields=None, ): self.path = path self.name = name self.outpath = os.path.join(path, name) self.param = param or {} self.file_handler = None os.makedirs(self.outpath, exist_ok=True) # stdout & file logger setup_logging( name="muse_origin", level=loglevel, color=logcolor, fmt="%(levelname)-05s: %(message)s", stream=sys.stdout, ) self.logger = logging.getLogger("muse_origin") self._setup_logfile(self.logger) self.param["loglevel"] = loglevel self.param["logcolor"] = logcolor self._loginfo("Step 00 - Initialization (ORIGIN v%s)", __version__) # dict of Step instances, indexed by step names self.steps = OrderedDict() # dict containing the data attributes of each step, to expose them on # the ORIGIN object self._dataobjs = {} for i, cls in enumerate(steps.STEPS, start=1): # Instantiate the step object, give it a step number step = cls(self, i, self.param) # force its signature to be the same as step.run (without the # ORIGIN instance), which allows to see its arguments and their # default value. sig = inspect.signature(step.run) step.__signature__ = sig.replace( parameters=[p for p in sig.parameters.values() if p.name != "orig"] ) self.steps[step.name] = step # Insert the __call__ method of the step in the ORIGIN object. This # allows to run a step with a method like "step01_preprocessing". self.__dict__[step.method_name] = step for name, _ in step._dataobjs: self._dataobjs[name] = step # MUSE data cube self._loginfo("Read the Data Cube %s", filename) self.param["cubename"] = filename self.cube = Cube(filename) self.Nz, self.Ny, self.Nx = self.shape = self.cube.shape # RA-DEC coordinates self.wcs = self.cube.wcs # spectral coordinates self.wave = self.cube.wave # List of spectral profile if profiles is None: profiles = os.path.join(CURDIR, "Dico_3FWHM.fits") self.param["profiles"] = profiles # FSF self.param["fieldmap"] = fieldmap self.param["PSF_size"] = PSF_size self._read_fsf( self.cube, fieldmap=fieldmap, wfields=wfields, PSF=PSF, LBDA_FWHM_PSF=LBDA_FWHM_PSF, FWHM_PSF=FWHM_PSF, PSF_size=PSF_size, ) # additional images self.ima_white = imawhite if imawhite else self.cube.mean(axis=0) self.testO2, self.histO2, self.binO2 = None, None, None self._loginfo("00 Done") def __getattr__(self, name): # Use __getattr__ to provide access to the steps data attributes # via the ORIGIN object. This will also trigger the loading of # the objects if needed. if name in self._dataobjs: return getattr(self._dataobjs[name], name) else: raise AttributeError(f"unknown attribute {name}") def __dir__(self): return ( super().__dir__() + list(self._dataobjs.keys()) + [o.method_name for o in self.steps.values()] ) @lazyproperty def cube_raw(self): # Flux - set to 0 the Nan return self.cube.data.filled(fill_value=0) @lazyproperty def mask(self): return self.cube._mask @lazyproperty def var(self): # variance - set to Inf the Nan return self.cube.var.filled(np.inf)
[docs] @classmethod def init( cls, cube, fieldmap=None, profiles=None, PSF=None, LBDA_FWHM_PSF=None, FWHM_PSF=None, PSF_size=25, name="origin", path=".", loglevel="DEBUG", logcolor=False, ): """Create a ORIGIN object. An Origin object is composed by: - cube data (raw data and covariance) - 1D dictionary of spectral profiles - MUSE PSF - parameters used to segment the cube in different zones. Parameters ---------- cube : str Cube FITS file name fieldmap : str FITS file containing the field map (mosaic) profiles : str FITS of spectral profiles If None, a default dictionary of 20 profiles is used. PSF : str Cube FITS filename containing a MUSE PSF per wavelength. If None, PSF are computed with a Moffat function (13x13 pixels, beta=2.6, fwhm1=0.76, fwhm2=0.66, lambda1=4750, lambda2=7000) LBDA_FWHM_PSF: list of float Value of the FWMH of the PSF in pixel for each wavelength step (mean of the fields). FWHM_PSF : list of float FWHM of the PSFs in pixels, one per field. PSF_size : int Spatial size of the PSF (when reconstructed from the cube header). name : str Name of this session and basename for the sources. ORIGIN.write() method saves the session in a folder that has this name. The ORIGIN.load() method will be used to load a session, continue it or create a new from it. loglevel : str Level for the logger (defaults to DEBUG). logcolor : bool Use color for the logger levels. """ return cls( cube, path=path, name=name, fieldmap=fieldmap, profiles=profiles, PSF=PSF, LBDA_FWHM_PSF=LBDA_FWHM_PSF, FWHM_PSF=FWHM_PSF, PSF_size=PSF_size, loglevel=loglevel, logcolor=logcolor, )
[docs] @classmethod @timeit def load(cls, folder, newname=None, loglevel=None, logcolor=None): """Load a previous session of ORIGIN. ORIGIN.write() method saves a session in a folder that has the name of the ORIGIN object (self.name). Parameters ---------- folder : str Folder name (with the relative path) where the ORIGIN data have been stored. newname : str New name for this session. This parameter lets the user to load a previous session but continue in a new one. If None, the user will continue the loaded session. loglevel : str Level for the logger (by default reuse the saved level). logcolor : bool Use color for the logger levels. """ path = os.path.dirname(os.path.abspath(folder)) name = os.path.basename(folder) with open(f"{folder}/{name}.yaml", "r") as stream: param = load_yaml(stream) if "FWHM PSF" in param: FWHM_PSF = np.asarray(param["FWHM PSF"]) else: FWHM_PSF = None if "LBDA_FWHM PSF" in param: LBDA_FWHM_PSF = np.asarray(param["LBDA FWHM PSF"]) else: LBDA_FWHM_PSF = None if os.path.isfile(param["PSF"]): PSF = param["PSF"] else: if os.path.isfile("%s/cube_psf.fits" % folder): PSF = "%s/cube_psf.fits" % folder else: PSF_files = glob.glob("%s/cube_psf_*.fits" % folder) if len(PSF_files) == 0: PSF = None elif len(PSF_files) == 1: PSF = PSF_files[0] else: PSF = sorted(PSF_files) wfield_files = glob.glob("%s/wfield_*.fits" % folder) if len(wfield_files) == 0: wfields = None else: wfields = sorted(wfield_files) # step0 if os.path.isfile("%s/ima_white.fits" % folder): ima_white = Image("%s/ima_white.fits" % folder) else: ima_white = None if newname is not None: # copy outpath to the new path shutil.copytree(os.path.join(path, name), os.path.join(path, newname)) name = newname loglevel = loglevel if loglevel is not None else param["loglevel"] logcolor = logcolor if logcolor is not None else param["logcolor"] obj = cls( path=path, name=name, param=param, imawhite=ima_white, loglevel=loglevel, logcolor=logcolor, filename=param["cubename"], fieldmap=param["fieldmap"], wfields=wfields, profiles=param["profiles"], PSF=PSF, FWHM_PSF=FWHM_PSF, LBDA_FWHM_PSF=LBDA_FWHM_PSF, ) for step in obj.steps.values(): step.load(obj.outpath) # special case for step3 NbAreas = param.get("nbareas") if NbAreas is not None: if os.path.isfile("%s/testO2_1.txt" % folder): obj.testO2 = [ np.loadtxt("%s/testO2_%d.txt" % (folder, area), ndmin=1) for area in range(1, NbAreas + 1) ] if os.path.isfile("%s/histO2_1.txt" % folder): obj.histO2 = [ np.loadtxt("%s/histO2_%d.txt" % (folder, area), ndmin=1) for area in range(1, NbAreas + 1) ] if os.path.isfile("%s/binO2_1.txt" % folder): obj.binO2 = [ np.loadtxt("%s/binO2_%d.txt" % (folder, area), ndmin=1) for area in range(1, NbAreas + 1) ] return obj
[docs] def info(self): """Prints the processing log.""" with open(self.logfile) as f: for line in f: if line.find("Done") == -1: print(line, end="")
[docs] def status(self): """Prints the processing status.""" for name, step in self.steps.items(): print(f"- {step.idx:02d}, {name}: {step.status.name}")
def _setup_logfile(self, logger): if self.file_handler is not None: # Remove the handlers before adding a new one self.file_handler.close() logger.handlers.remove(self.file_handler) self.logfile = os.path.join(self.outpath, self.name + ".log") self.file_handler = RotatingFileHandler(self.logfile, "a", 1000000, 1) self.file_handler.setLevel(logging.DEBUG) formatter = logging.Formatter("%(asctime)s %(message)s") self.file_handler.setFormatter(formatter) logger.addHandler(self.file_handler)
[docs] def set_loglevel(self, level): """Set the logging level for the console logger.""" handler = next( h for h in self.logger.handlers if isinstance(h, logging.StreamHandler) ) handler.setLevel(level) self.param["loglevel"] = level
@property def nbAreas(self): """Number of area (segmentation) for the PCA.""" return self.param.get("nbareas") @property def threshold_correl(self): """Estimated threshold used to detect lines on local maxima of max correl.""" return self.param.get("threshold") @threshold_correl.setter def threshold_correl(self, value): self.param["threshold"] = value @property def threshold_std(self): """Estimated threshold used to detect complementary lines on local maxima of std cube.""" return self.param.get("threshold_std") @threshold_std.setter def threshold_std(self, value): self.param["threshold_std"] = value @lazyproperty def profiles(self): """Read the list of spectral profiles.""" profiles = self.param["profiles"] self._loginfo("Load dictionary of spectral profile %s", profiles) with fits.open(profiles) as hdul: profiles = [hdu.data for hdu in hdul[1:]] # check that the profiles have the same size if len({p.shape[0] for p in profiles}) != 1: raise ValueError("The profiles must have the same size") return profiles @lazyproperty def FWHM_profiles(self): """Read the list of FWHM of the spectral profiles.""" with fits.open(self.param["profiles"]) as hdul: return [hdu.header["FWHM"] for hdu in hdul[1:]] def _read_fsf( self, cube, fieldmap=None, wfields=None, PSF=None, LBDA_FWHM_PSF=None, FWHM_PSF=None, PSF_size=25, ): """Read FSF cube(s), with fieldmap in the case of MUSE mosaic. There are two ways to specify the PSF informations: - with the ``PSF``, ``FWHM_PSF``, and ``LBDA_FWHM`` parameters. - or read from the cube header and fieldmap. If there are multiple fields, for a mosaic, we also need weight maps. If the cube contains a FSF model and a fieldmap is given, these weight maps are computed automatically. Parameters ---------- cube : mpdaf.obj.Cube The input datacube. fieldmap : str FITS file containing the field map (mosaic). wfields : list of str List of weight maps (one per fields in the case of MUSE mosaic). PSF : str or list of str Cube FITS filename containing a MUSE PSF per wavelength, or a list of filenames for multiple fields (mosaic). LBDA_FWHM_PSF: list of float Value of the FWMH of the PSF in pixel for each wavelength step (mean of the fields). FWHM_PSF : list of float FWHM of the PSFs in pixels, one per field. PSF_size : int Spatial size of the PSF (when reconstructed from the cube header). """ self.wfields = None info = self.logger.info if PSF is None or FWHM_PSF is None or LBDA_FWHM_PSF is None: info("Compute FSFs from the datacube FITS header keywords") if "FSFMODE" not in cube.primary_header: raise ValueError("missing PSF keywords in the cube FITS header") # FSF created from FSF*** keywords try: from mpdaf.MUSE import FSFModel except ImportError: sys.exit("you must upgrade MPDAF") fsf = FSFModel.read(cube) lbda = cube.wave.coord() shape = (PSF_size, PSF_size) if isinstance(fsf, FSFModel): # just one FSF self.PSF = fsf.get_3darray(lbda, shape) self.LBDA_FWHM_PSF = fsf.get_fwhm(lbda, unit="pix") self.FWHM_PSF = np.mean(self.LBDA_FWHM_PSF) # mean of the fwhm of the FSF in pixel info("mean FWHM of the FSFs = %.2f pixels", self.FWHM_PSF) else: self.PSF = [f.get_3darray(lbda, shape) for f in fsf] fwhm = np.array([f.get_fwhm(lbda, unit="pix") for f in fsf]) self.LBDA_FWHM_PSF = np.mean(fwhm, axis=0) self.FWHM_PSF = np.mean(fwhm, axis=1) for i, fwhm in enumerate(self.FWHM_PSF): info("mean FWHM of the FSFs (field %d) = %.2f pixels", i, fwhm) info("Compute weight maps from field map %s", fieldmap) fmap = FieldsMap(fieldmap, nfields=len(fsf)) # weighted field map self.wfields = fmap.compute_weights() self.param["PSF"] = cube.primary_header["FSFMODE"] else: self.LBDA_FWHM_PSF = LBDA_FWHM_PSF if isinstance(PSF, str): info("Load FSFs from %s", PSF) self.param["PSF"] = PSF self.PSF = fits.getdata(PSF) if self.PSF.shape[1] != self.PSF.shape[2]: raise ValueError("PSF must be a square image.") if not self.PSF.shape[1] % 2: raise ValueError("The spatial size of the PSF must be odd.") if self.PSF.shape[0] != self.shape[0]: raise ValueError( "PSF and data cube have not the same" "dimensions along the spectral axis." ) # mean of the fwhm of the FSF in pixel self.FWHM_PSF = np.mean(FWHM_PSF) self.param["FWHM PSF"] = FWHM_PSF.tolist() info("mean FWHM of the FSFs = %.2f pixels", self.FWHM_PSF) else: nfields = len(PSF) self.wfields = [] self.PSF = [] self.FWHM_PSF = list(FWHM_PSF) for n in range(nfields): info("Load FSF from %s", PSF[n]) self.PSF.append(fits.getdata(PSF[n])) info("Load weight maps from %s", wfields[n]) self.wfields.append(fits.getdata(wfields[n])) info( "mean FWHM of the FSFs (field %d) = %.2f pixels", n, FWHM_PSF[n] ) self.param["FWHM PSF"] = self.FWHM_PSF.tolist() self.param["LBDA FWHM PSF"] = self.LBDA_FWHM_PSF.tolist()
[docs] @timeit def write(self, path=None, erase=False): """Save the current session in a folder that will have the name of the ORIGIN object (self.name). The ORIGIN.load(folder, newname=None) method will be used to load a session. The parameter newname will let the user to load a session but continue in a new one. Parameters ---------- path : str Path where the folder (self.name) will be stored. erase : bool Remove the folder if it exists. """ self._loginfo("Writing...") # adapt session if path changes if path is not None and path != self.path: if not os.path.exists(path): raise ValueError(f"path does not exist: {path}") self.path = path outpath = os.path.join(path, self.name) # copy outpath to the new path shutil.copytree(self.outpath, outpath) self.outpath = outpath self._setup_logfile(self.logger) if erase: shutil.rmtree(self.outpath) os.makedirs(self.outpath, exist_ok=True) # PSF if isinstance(self.PSF, list): for i, psf in enumerate(self.PSF): cube = Cube(data=psf, mask=np.ma.nomask, copy=False) cube.write(os.path.join(self.outpath, "cube_psf_%02d.fits" % i)) else: cube = Cube(data=self.PSF, mask=np.ma.nomask, copy=False) cube.write(os.path.join(self.outpath, "cube_psf.fits")) if self.wfields is not None: for i, wfield in enumerate(self.wfields): im = Image(data=wfield, mask=np.ma.nomask) im.write(os.path.join(self.outpath, "wfield_%02d.fits" % i)) if self.ima_white is not None: self.ima_white.write("%s/ima_white.fits" % self.outpath) for step in self.steps.values(): step.dump(self.outpath) # parameters in .yaml with open(f"{self.outpath}/{self.name}.yaml", "w") as stream: dump_yaml(self.param, stream) # step3 - saving this manually for now if self.nbAreas is not None: if self.testO2 is not None: for area in range(1, self.nbAreas + 1): np.savetxt( "%s/testO2_%d.txt" % (self.outpath, area), self.testO2[area - 1] ) if self.histO2 is not None: for area in range(1, self.nbAreas + 1): np.savetxt( "%s/histO2_%d.txt" % (self.outpath, area), self.histO2[area - 1] ) if self.binO2 is not None: for area in range(1, self.nbAreas + 1): np.savetxt( "%s/binO2_%d.txt" % (self.outpath, area), self.binO2[area - 1] ) self._loginfo("Current session saved in %s", self.outpath)
[docs] def plot_areas(self, ax=None, **kwargs): """ Plot the 2D segmentation for PCA from self.step02_areas() on the test used to perform this segmentation. Parameters ---------- ax : matplotlib.Axes The Axes instance in which the image is drawn. kwargs : matplotlib.artist.Artist Optional extra keyword/value arguments to be passed to ``ax.imshow()``. """ if ax is None: ax = plt.gca() kwargs.setdefault("cmap", "jet") kwargs.setdefault("alpha", 0.7) kwargs.setdefault("interpolation", "nearest") kwargs["origin"] = "lower" cax = ax.imshow(self.areamap._data, **kwargs) i0 = np.min(self.areamap._data) i1 = np.max(self.areamap._data) if i0 != i1: from matplotlib.colors import BoundaryNorm from mpl_toolkits.axes_grid1 import make_axes_locatable n = i1 - i0 + 1 bounds = np.linspace(i0, i1 + 1, n + 1) - 0.5 norm = BoundaryNorm(bounds, n + 1) divider = make_axes_locatable(ax) cax2 = divider.append_axes("right", size="5%", pad=1) plt.colorbar( cax, cax=cax2, cmap=kwargs["cmap"], norm=norm, spacing="proportional", ticks=bounds + 0.5, boundaries=bounds, format="%1i", )
[docs] def plot_step03_PCA_threshold( self, log10=False, ncol=3, legend=True, xlim=None, fig=None, **fig_kw ): """ Plot the histogram and the threshold for the starting point of the PCA. Parameters ---------- log10 : bool Draw histogram in logarithmic scale or not ncol : int Number of colomns in the subplots legend : bool If true, write pfa and threshold values as legend xlim : (float, float) Set the data limits for the x-axes fig : matplotlib.Figure Figure instance in which the image is drawn **fig_kw : matplotlib.artist.Artist All additional keyword arguments are passed to the figure() call. """ if self.nbAreas is None: raise ValueError("Run the step 02 to initialize self.nbAreas") if fig is None: fig = plt.figure() if self.nbAreas <= ncol: n = 1 m = self.nbAreas else: n = self.nbAreas // ncol m = ncol if (n * m) < self.nbAreas: n = n + 1 for area in range(1, self.nbAreas + 1): if area == 1: ax = fig.add_subplot(n, m, area, **fig_kw) else: ax = fig.add_subplot(n, m, area, sharey=fig.axes[0], **fig_kw) self.plot_PCA_threshold(area, "step03", log10, legend, xlim, ax) # Fine-tune figure for a in fig.axes[:-1]: a.set_xlabel("") for a in fig.axes[1:]: a.set_ylabel("") plt.setp([a.get_yticklabels() for a in fig.axes], visible=False) plt.setp([a.get_yticklabels() for a in fig.axes[0::m]], visible=True) plt.setp([a.get_yticklines() for a in fig.axes], visible=False) plt.setp([a.get_yticklines() for a in fig.axes[0::m]], visible=True) fig.subplots_adjust(wspace=0) if xlim is not None: plt.setp([a.get_xticklabels() for a in fig.axes[:-m]], visible=False) plt.setp([a.get_xticklines() for a in fig.axes[:-m]], visible=False) fig.subplots_adjust(hspace=0)
[docs] def plot_step03_PCA_stat(self, cutoff=5, ax=None): """Plot the threshold value according to the area. Median Absolute Deviation is used to find outliers. Parameters ---------- cutoff : float Median Absolute Deviation cutoff ax : matplotlib.Axes The Axes instance in which the image is drawn """ if self.nbAreas is None: raise ValueError("Run the step 02 to initialize self.nbAreas") if self.thresO2 is None: raise ValueError("Run the step 03 to compute the threshold values") if ax is None: ax = plt.gca() ax.plot(np.arange(1, self.nbAreas + 1), self.thresO2, "+") med = np.median(self.thresO2) diff = np.absolute(self.thresO2 - med) mad = np.median(diff) if mad != 0: ksel = (diff / mad) > cutoff if ksel.any(): ax.plot( np.arange(1, self.nbAreas + 1)[ksel], np.asarray(self.thresO2)[ksel], "ro", ) ax.set_xlabel("area") ax.set_ylabel("Threshold") ax.set_title(f"PCA threshold (med={med:.2f}, mad= {mad:.2f})")
[docs] def plot_PCA_threshold( self, area, pfa_test="step03", log10=False, legend=True, xlim=None, ax=None ): """ Plot the histogram and the threshold for the starting point of the PCA. Parameters ---------- area : int in [1, nbAreas] Area ID pfa_test : float or str PFA of the test (if 'step03', the value set during step03 is used) log10 : bool Draw histogram in logarithmic scale or not legend : bool If true, write pfa and threshold values as legend xlim : (float, float) Set the data limits for the x-axis ax : matplotlib.Axes Axes instance in which the image is drawn """ if self.nbAreas is None: raise ValueError("Run the step 02 to initialize self.nbAreas") if pfa_test == "step03": param = self.param["compute_PCA_threshold"]["params"] if "pfa_test" in param: pfa_test = param["pfa_test"] hist = self.histO2[area - 1] bins = self.binO2[area - 1] thre = self.thresO2[area - 1] mea = self.meaO2[area - 1] std = self.stdO2[area - 1] else: raise ValueError( "pfa_test param is None: set a value or run the Step03" ) else: if self.cube_std is None: raise ValueError("Run the step 01 to initialize self.cube_std") # limits of each spatial zone ksel = self.areamap._data == area # Data in this spatio-spectral zone cube_temp = self.cube_std._data[:, ksel] # Compute_PCA_threshold from .lib_origin import Compute_PCA_threshold testO2, hist, bins, thre, mea, std = Compute_PCA_threshold( cube_temp, pfa_test ) if ax is None: ax = plt.gca() from scipy import stats center = (bins[:-1] + bins[1:]) / 2 gauss = stats.norm.pdf(center, loc=mea, scale=std) gauss *= hist.max() / gauss.max() if log10: with warnings.catch_warnings(): warnings.simplefilter("ignore") gauss = np.log10(gauss) hist = np.log10(hist) ax.plot(center, hist, "-k") ax.plot(center, hist, ".r") ax.plot(center, gauss, "-b", alpha=0.5) ax.axvline(thre, color="b", lw=2, alpha=0.5) ax.grid() if xlim is None: ax.set_xlim((center.min(), center.max())) else: ax.set_xlim(xlim) ax.set_xlabel("frequency") ax.set_ylabel("value") kwargs = dict(transform=ax.transAxes, bbox=dict(facecolor="red", alpha=0.5)) if legend: text = "zone %d\npfa %.2f\nthreshold %.2f" % (area, pfa_test, thre) ax.text(0.1, 0.8, text, **kwargs) else: ax.text(0.9, 0.9, "%d" % area, **kwargs)
[docs] def plot_mapPCA(self, area=None, iteration=None, ax=None, **kwargs): """ Plot at a given iteration (or at the end) the number of times a spaxel got cleaned by the PCA. Parameters ---------- area: int in [1, nbAreas] if None draw the full map for all areas iteration : int Display the nuisance/bacground pixels at iteration k ax : matplotlib.Axes The Axes instance in which the image is drawn kwargs : matplotlib.artist.Artist Optional extra keyword/value arguments to be passed to ``ax.imshow()``. """ if self.mapO2 is None: raise ValueError("Run the step 04 to initialize self.mapO2") themap = self.mapO2.copy() title = "Number of times the spaxel got cleaned by the PCA" if iteration is not None: title += "\n%d iterations" % iteration if area is not None: mask = np.ones_like(self.mapO2._data, dtype=np.bool) mask[self.areamap._data == area] = False themap._mask = mask title += " (zone %d)" % area if iteration is not None: themap[themap._data < iteration] = np.ma.masked if ax is None: ax = plt.gca() kwargs.setdefault("cmap", "jet") themap.plot(title=title, colorbar="v", ax=ax, **kwargs)
[docs] def plot_purity(self, comp=False, ax=None, log10=False, legend=True): """Draw number of sources per threshold computed in step06/step08. Parameters ---------- comp : bool If True, plot purity curves for the complementary lines (step08). ax : matplotlib.Axes The Axes instance in which the image is drawn. log10 : bool To draw histogram in logarithmic scale or not. legend : bool To draw the legend. """ if ax is None: ax = plt.gca() if comp: threshold = self.threshold_std purity = self.param["purity_std"] Pval = self.Pval_comp else: threshold = self.threshold_correl purity = self.param["purity"] Pval = self.Pval if Pval is None: raise ValueError("Run the step 06") Tval_r = Pval["Tval_r"] ax2 = ax.twinx() ax2.plot(Tval_r, Pval["Pval_r"], "y.-", label="purity") ax.plot(Tval_r, Pval["Det_M"], "b.-", label="n detections (+DATA)") ax.plot(Tval_r, Pval["Det_m"], "g.-", label="n detections (-DATA)") ax2.plot(threshold, purity, "xr") if log10: ax.set_yscale("log") ax2.set_yscale("log") ym, yM = ax.get_ylim() ax.plot( [threshold, threshold], [ym, yM], "r", alpha=0.25, lw=2, label="automatic threshold", ) ax.set_ylim((ym, yM)) ax.set_xlabel("Threshold") ax2.set_ylabel("Purity") ax.set_ylabel("Number of detections") ax.set_title("threshold %f" % threshold) h1, l1 = ax.get_legend_handles_labels() h2, l2 = ax2.get_legend_handles_labels() if legend: ax.legend(h1 + h2, l1 + l2, loc=2)
[docs] def plot_NB(self, src_ind, ax1=None, ax2=None, ax3=None): """Plot the narrow band images. Parameters ---------- src_ind : int Index of the object in self.Cat0. ax1 : matplotlib.Axes The Axes instance in which the NB image around the source is drawn. ax2 : matplotlib.Axes The Axes instance in which a other NB image for check is drawn. ax3 : matplotlib.Axes The Axes instance in which the difference is drawn. """ if self.Cat0 is None: raise ValueError("Run the step 05 to initialize self.Cat0") if ax1 is None and ax2 is None and ax3 is None: fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(12, 4)) # Coordinates of the source x0 = self.Cat0[src_ind]["x0"] y0 = self.Cat0[src_ind]["y0"] z0 = self.Cat0[src_ind]["z0"] # Larger spatial ranges for the plots longxy0 = 20 y01 = max(0, y0 - longxy0) y02 = min(self.shape[1], y0 + longxy0 + 1) x01 = max(0, x0 - longxy0) x02 = min(self.shape[2], x0 + longxy0 + 1) # Coordinates in this window y00 = y0 - y01 x00 = x0 - x01 # spectral profile num_prof = self.Cat0[src_ind]["profile"] profil0 = self.profiles[num_prof] # length of the spectral profile profil1 = profil0[profil0 > 1e-13] long0 = profil1.shape[0] # half-length of the spectral profile longz = long0 // 2 # spectral range intz1 = max(0, z0 - longz) intz2 = min(self.shape[0], z0 + longz + 1) # subcube for the plot cube_test_plot = self.cube_raw[intz1:intz2, y01:y02, x01:x02] wcs = self.wcs[y01:y02, x01:x02] # controle cube nb_ranges = 3 if (z0 + longz + nb_ranges * long0) < self.shape[0]: intz1c = intz1 + nb_ranges * long0 intz2c = intz2 + nb_ranges * long0 else: intz1c = intz1 - nb_ranges * long0 intz2c = intz2 - nb_ranges * long0 cube_controle_plot = self.cube_raw[intz1c:intz2c, y01:y02, x01:x02] # (1/sqrt(2)) * difference of the 2 sububes diff_cube_plot = (1 / np.sqrt(2)) * (cube_test_plot - cube_controle_plot) if ax1 is not None: ax1.plot(x00, y00, "m+") ima_test_plot = Image(data=cube_test_plot.sum(axis=0), wcs=wcs) title = "cube test - (%d,%d)\n" % (x0, y0) title += "lambda=%d int=[%d,%d[" % (z0, intz1, intz2) ima_test_plot.plot(colorbar="v", title=title, ax=ax1) ax1.get_xaxis().set_visible(False) ax1.get_yaxis().set_visible(False) if ax2 is not None: ax2.plot(x00, y00, "m+") ima_controle_plot = Image(data=cube_controle_plot.sum(axis=0), wcs=wcs) title = "check - (%d,%d)\n" % (x0, y0) + "int=[%d,%d[" % (intz1c, intz2c) ima_controle_plot.plot(colorbar="v", title=title, ax=ax2) ax2.get_xaxis().set_visible(False) ax2.get_yaxis().set_visible(False) if ax3 is not None: ax3.plot(x00, y00, "m+") ima_diff_plot = Image(data=diff_cube_plot.sum(axis=0), wcs=wcs) title = "Difference narrow band - (%d,%d)\n" % (x0, y0) + "int=[%d,%d[" % ( intz1c, intz2c, ) ima_diff_plot.plot(colorbar="v", title=title, ax=ax3) ax3.get_xaxis().set_visible(False) ax3.get_yaxis().set_visible(False)
[docs] def plot_sources( self, x, y, circle=False, vmin=0, vmax=30, title=None, ax=None, **kwargs ): """Plot detected emission lines on the 2D map of maximum of the T_GLR values over the spectral channels. Parameters ---------- x : array Coordinates along the x-axis of the estimated lines in pixels. y : array Coordinates along the y-axis of the estimated lines in pixels. circle : bool If true, plot circles with a diameter equal to the mean of the fwhm of the PSF. vmin : float Minimum pixel value to use for the scaling. vmax : float Maximum pixel value to use for the scaling. title : str An optional title for the figure (None by default). ax : matplotlib.Axes the Axes instance in which the image is drawn kwargs : matplotlib.artist.Artist Optional arguments passed to ``ax.imshow()``. """ if ax is None: ax = plt.gca() self.maxmap.plot(vmin=vmin, vmax=vmax, title=title, ax=ax, **kwargs) if circle: fwhm = ( self.FWHM_PSF if self.wfields is None else np.max(np.array(self.FWHM_PSF)) ) radius = np.round(fwhm / 2) for pos in zip(x, y): ax.add_artist(plt.Circle(pos, radius, color="k", fill=False)) else: ax.plot(x, y, "k+")
[docs] def plot_segmaps(self, axes=None, figsize=(6, 6)): """Plot the segmentation maps: - segmap_cont: segmentation map computed on the white-light image. - segmap_merged: segmentation map merged with the cont one and another one computed on the residual. - segmap_purity: combines self.segmap and a segmentation on the maxmap. - segmap_label: segmentation map used for the catalog, either the one given as input, otherwise self.segmap_cont. """ segmaps = {} ncolors = 0 for name in ("segmap_cont", "segmap_merged", "segmap_purity", "segmap_label"): segm = getattr(self, name, None) if segm: segmaps[name] = segm ncolors = max(ncolors, len(np.unique(segm._data))) nseg = len(segmaps) if nseg == 0: self.logger.warning("nothing to plot") return try: # TODO: this will be renamed to make_random_cmap in a future # version of photutils from photutils.utils.colormaps import random_cmap except ImportError: self.logger.error("photutils is needed for this") cmap = "jet" else: cmap = random_cmap(ncolors=ncolors) cmap.colors[0] = (0.0, 0.0, 0.0) if axes is None: figsize = (figsize[0] * nseg, figsize[1]) fig, axes = plt.subplots(1, nseg, sharex=True, sharey=True, figsize=figsize) if nseg == 1: axes = [axes] for ax, (name, im) in zip(axes, segmaps.items()): im.plot(ax=ax, cmap=cmap, title=name, colorbar="v")
[docs] def plot_min_max_hist(self, ax=None, comp=False): """Plot the histograms of local maxima and minima.""" if comp: cube_local_max = self.cube_std_local_max._data cube_local_min = self.cube_std_local_min._data else: cube_local_max = self.cube_local_max._data cube_local_min = self.cube_local_min._data if ax is None: fig, ax = plt.subplots(1, 1, figsize=(12, 6)) ax.set_yscale("log") ax.grid(which="major", linewidth=1) ax.grid(which="minor", linewidth=1, linestyle=":") maxloc = cube_local_max[cube_local_max > 0] bins = np.arange((maxloc.max() + 1) * 2) / 2 ax.hist( maxloc, bins=bins, histtype="step", label="max", linewidth=2, cumulative=-1 ) minloc = cube_local_min[cube_local_min > 0] bins = np.arange((minloc.max() + 1) * 2) / 2 ax.hist( minloc, bins=bins, histtype="step", label="min", linewidth=2, cumulative=-1 ) minloc2 = cube_local_min[:, self.segmap_purity._data == 0] minloc2 = minloc2[minloc2 > 0] ax.hist( minloc2, bins=bins, histtype="step", label="min filt", linewidth=2, cumulative=-1, ) ax.legend() ax.set_title("Cumulative histogram of min/max loc")
[docs] def timestat(self, table=False): """Print CPU usage by steps. If ``table`` is True, an astropy.table.Table is returned. """ if table: name = [] exdate = [] extime = [] tot = 0 for s in self.steps.items(): if "execution_date" in s[1].meta.keys(): name.append(s[1].method_name) exdate.append(s[1].meta["execution_date"]) t = s[1].meta["runtime"] tot += t extime.append(datetime.timedelta(seconds=t)) name.append("Total") exdate.append("") extime.append(str(datetime.timedelta(seconds=tot))) return Table( data=[name, exdate, extime], names=["Step", "Exec Date", "Exec Time"], masked=True, ) else: tot = 0 for s in self.steps.items(): name = s[1].method_name if "execution_date" in s[1].meta.keys(): exdate = s[1].meta["execution_date"] t = s[1].meta["runtime"] tot += t extime = datetime.timedelta(seconds=t) self.logger.info( "%s executed: %s run time: %s", name, exdate, str(extime) ) self.logger.info( "*** Total run time: %s", str(datetime.timedelta(seconds=tot)) )
[docs] def stat(self): """Print detection summary.""" d = self._get_stat() self.logger.info( "ORIGIN PCA pfa %.2f Back Purity: %.2f " "Threshold: %.2f Bright Purity %.2f Threshold %.2f", d["pca"], d["back_purity"], d["back_threshold"], d["bright_purity"], d["bright_threshold"], ) self.logger.info("Nb of detected lines: %d", d["tot_nlines"]) self.logger.info( "Nb of sources Total: %d Background: %d Cont: %d", d["tot_nsources"], d["back_nsources"], d["cont_nsources"], ) self.logger.info( "Nb of sources detected in faint (after PCA): %d " "in std (before PCA): %d", d["faint_nsources"], d["bright_nsources"], )
def _get_stat(self): p = self.param cat = self.Cat3_sources if cat: back = cat[cat["seg_label"] == 0] cont = cat[cat["seg_label"] > 0] bright = cat[cat["comp"] == 1] faint = cat[cat["comp"] == 0] return dict( pca=p["compute_PCA_threshold"]["params"]["pfa_test"], back_purity=p["purity"], back_threshold=p["threshold"], bright_purity=p["purity_std"], bright_threshold=p["threshold_std"], tot_nlines=len(self.Cat3_lines), tot_nsources=len(cat), back_nsources=len(back), cont_nsources=len(cont), faint_nsources=len(faint), bright_nsources=len(bright), )