Source code for libvhc.reference_file_parser

# Virus Health Check: a validation tool for HETDEX/VIRUS data
# Copyright (C) 2015, 2016, 2017, 2018  "The HETDEX collaboration"
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""The module provides the tools to parse and retrieve information from
reference files with the following structure ::

    # id channel amplifier some_number [some_number2 [..]]
    001       L       L         1      [    1        [..]]
    001       L       U         1      [    1        [..]]
    001       R       L         1      [    1        [..]]
    001       R       U         1      [    1        [..]]
    ...

It is also possible to use wild cards similarly to::

    # id channel amplifier some_number [some_number2 [..]]
     *        *       *         1      [    1        [..]]
    001       *       U         1      [    1        [..]]
    001       R       *         1      [    1        [..]]
     *        R       U         1      [    1        [..]]

When using wild cards, the resolution order highlighted in
:meth:`~_BaseParser._key_wildcards` is used.

.. versionadded:: 0.3.0

Each reference file is represented by a class derived from
:class:`_BaseParser`. The method :meth:`_BaseParser.filename` returns the name
of the file to parse and must be overridden in every derived class. Each class
implementing a parser can be registered advertising it in the
``vhc.file_parsers`` entry point group

.. code-block:: python

    # in mylib/parser.py
    class _AReferenceParser(_BaseParser):
        def filename(self):
            return "/path/to/reference/file.dat"

    # in setup.py
    entry_points = {'vhc.file_parsers':
                    ['reference_parser = mylib.parser:_AReferenceParser']
                    }

See :class:`_BaseParser` for a list and description of all the available
methods.

The registered classes are then initialised by :func:`init`.

After initialization, the classes are accessible via the :func:`picker`
function. Then one can call :class:`~_BaseParser.get_value` method.

.. code-block:: python

    parser = picker("reference_parser")
    parser.get_value(4, "L", "L")
"""
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

import abc
import itertools as it
import os

import six

import libvhc.exceptions as vhcexcept

# dictionary containing the registered classes and, after initialization,
# instances
_registered = {}
_instances = {}


[docs]def init(conf, log): """Initialise the registered classes. This function assumes that all the classes are initialized as :class:`_BaseParser` Parameters ---------- conf : :class:`pyhetdex.tools.configuration.ConfigParser` instance configuration object: should contain any info necessary to use the instances log : :class:`logging.LoggerAdapter` or :class:`logging.Logger` instance logger """ # initialise the classes for k, v in _registered.items(): try: _instances[k] = v(conf, log) except Exception: log.exception("Failed to initialize the parser '%s'", k)
[docs]def picker(name): """Pick the instance registered under 'name'. Should be called after :func:`init`. Parameters ---------- name : string name under which the class or instance must be registered Returns ------- :class:`_BaseParser` child instance Raises ------ KeyError if the name is not registered """ return _instances[name]
# parsers implementations
[docs]@six.add_metaclass(abc.ABCMeta) class _BaseParser(object): """Base class for the parsers. All derived classes must overload :meth:`filename` Parameters ---------- conf : :class:`pyhetdex.tools.configuration.ConfigParser` instance configuration object: should contain any info necessary to use the instances log : :class:`logging.LoggerAdapter` or :class:`logging.Logger` instance logger Attributes ---------- conf, log : as above """ def __init__(self, conf, log): self.conf = conf self.log = log self._parsed = {} # dictionary containing the parsed file self._parse_file()
[docs] def _key(self, id_, channel, amplifier): """Return the dictionary key Parameters ---------- id_, channel, amplifier : string they should match any of the first columns in the file Returns ------- string dictionary key """ key = "{id_}_{ch}_{amp}".format(id_=id_, ch=channel, amp=amplifier) return key
[docs] def _key_wildcards(self, id_, channel, amplifier): """Return the first key of the list below that exist:: id_ channel amplifier id_ channel * id_ * amplifier id_ * * * channel amplifier * channel * * * amplifier * * * If none is matched, returns the first one. Parameters ---------- id_, channel, amplifier : string they should match any of the first columns in the file Returns ------- string dictionary key """ for i, c, a in it.product([id_, '*'], [channel, '*'], [amplifier, '*']): key = self._key(i, c, a) if key in self._parsed: return key key = self._key(id_, channel, amplifier) return key
[docs] def get_value(self, id_, channel, amplifier): """Returns the value(s) for the ``id_`` fiber bundle or spectrograph (or whatever), for the given ``channel``/``amplifier`` pair. Before returning, the values are parsed through :meth:`parse_value` This is the only method that should be used. Parameters ---------- id_, channel, amplifier : string they should match any of the first columns in the file Returns ------- whatever :meth:`parse_value` returns Raises ------ VHCReferenceKeyError if the combination of id, channel and amplifier does not exist in the reference file """ key = self._key_wildcards(id_, channel, amplifier) try: value = self._parsed[key] except KeyError as e: msg = ("The required combination of id: {}, channel: {} and" " amplifier: {} is not present in the reference file '{}'") msg = msg.format(id_, channel, amplifier, self.filename()) six.raise_from(vhcexcept.VHCReferenceKeyError(msg), e) return self.parse_value(value)
[docs] @abc.abstractmethod def filename(self): # pragma: no cover """Abstract method: name of the reference file. Either hard-coded or from the configuration object. Returns ------- filename : string """ return ""
[docs] def _parse_file(self): """Parse the input file and save it into a dictionary The key of the dictionary is returned by :meth:`_key` and the value are the columns from the 4th on. """ with open(self.filename()) as f: for l in f: if l.startswith("#") or not l.strip(): continue _id, _ch, _amp, rest = l.split(None, 3) key = self._key(_id, _ch, _amp) self._parsed[key] = rest.strip("\n").strip() self.log.info("'%s' reference file parsed", self.filename())
[docs] def parse_value(self, str_value): """Parse the input string. The return value of :meth:`get_value` is passed through this function. This implementation just return the input value Parameters ---------- str_value : string string to be parsed Returns ------- string parsed string """ return str_value
[docs] def expand_file_name(self, fname): """If ``fname`` is not absolute, expand the ``fname`` relative to the directory containing the reference file name returned by :meth:`filename` """ if os.path.isabs(fname): return fname else: ref_path = os.path.split(self.filename())[0] return os.path.join(ref_path, fname)
[docs]class _Overscan(_BaseParser): """Parse the file with the information for the overscan checks """
[docs] def filename(self): '''Name from the ``overscan_reference`` section of the ``[common]`` configuration section''' return os.path.expanduser(self.conf.get("common", "overscan_reference"))
[docs] def parse_value(self, str_value): """Parse the input string. The return value of :meth:`~_BaseParser.get_value` is passed through this function. This implementation just return the input value Parameters ---------- str_value : string string to be parsed Returns ------- mean, stddev, max_ovc_dev_mean, max_ovc_dev_rms : float expected mean and standard deviation of the overscan """ return [float(i) for i in str_value.split()]
[docs]class _Bias(_Overscan): """Parse the file with the information for the bias comparison checks"""
[docs] def filename(self): '''Name from the ``bias_reference`` section of the ``[bias]`` configuration section''' return os.path.expanduser(self.conf.get("bias", "bias_reference"))
[docs]class _NFibers(_BaseParser): """Parse the file with the information for the n_fibers checks """
[docs] def filename(self): '''Name from the ``nfibers_reference`` section of the ``[flat]`` configuration section''' return os.path.expanduser(self.conf.get("flat", "nfibers_reference"))
[docs] def parse_value(self, str_value): """Parse the remaining columns of the n_fibers reference file Parameters ---------- str_value : string string to be parsed Returns ------- n_fibers, tolerance : int expected number of fibers and maximum error """ return [int(i) for i in str_value.split()]
[docs]class _MinFlux(_BaseParser): """Parse the file with the information for the min_flux checks """
[docs] def filename(self): '''Name from the ``min_flux_reference`` section of the ``[flat]`` configuration section''' return os.path.expanduser(self.conf.get("flat", "min_flux_reference"))
[docs] def parse_value(self, str_value): """Parse the remaining column of the min_flux reference file Parameters ---------- str_value : string string to be parsed Returns ------- n_fibers : float minimum allowed flux """ return float(str_value)
[docs]class _Saturation(_BaseParser): """Parse the file with the information for the saturation checks """
[docs] def filename(self): '''Name from the ``saturation_reference`` section of the ``[common]`` configuration section''' return os.path.expanduser(self.conf.get("common", "saturation_reference"))
[docs] def parse_value(self, str_value): """Parse the remaining column of the saturation reference file Parameters ---------- str_value : string string to be parsed Returns ------- n_fibers : float maximum allowed flux """ sat, max_n = str_value.split() return float(sat), int(max_n)
[docs]class _Distortion(_BaseParser): """Parse the file containing the list of reference distortion files """
[docs] def filename(self): '''Name from the ``distortion_reference`` section of the ``[common]`` configuration section''' return os.path.expanduser(self.conf.get("common", "distortion_reference"))
[docs] def parse_value(self, str_value): """Parse the name of the reference distortion file Parameters ---------- str_value : string string to be parsed Returns ------- dist_file : string name of the distortion file """ return self.expand_file_name(str_value.strip())
[docs]class _Arcs(_BaseParser): """Parse the file containing the list of reference line list files and maximum deviation of the arcs peaks from the expected position """
[docs] def filename(self): '''Name from the ``arcs_reference`` section of the ``[arc]`` configuration section''' return os.path.expanduser(self.conf.get("arc", "arcs_reference"))
[docs] def parse_value(self, str_value): """Parse the name of the reference distortion file and the number of pixel tolerance Parameters ---------- str_value : string string to be parsed Returns ------- line_list : string name of the line list file peak_tolerance : int maximum tolerance for arc peak shift """ line_list, peak_tolerance = str_value.split() return self.expand_file_name(line_list), int(peak_tolerance)
[docs]class _Dettemp(_BaseParser): """Parse the file containing the maximum detector temperature and the maximum allowed difference between the commanded and the actual temperature """
[docs] def filename(self): '''Name from the ``dettemp_reference`` section of the ``[common]`` configuration section''' return os.path.expanduser(self.conf.get("common", "dettemp_reference"))
[docs] def parse_value(self, str_value): """Parse the value and returns two floats Parameters ---------- str_value : string string to be parsed Returns ------- floats temperature and delta temperature """ return [float(i) for i in str_value.split()]
[docs]class _Nullpixel(_BaseParser): """Parse the file containing the maximum number of null pixels """
[docs] def filename(self): '''Name from the ``nullpixel_reference`` section of the ``[common]`` configuration section''' return os.path.expanduser(self.conf.get("common", "nullpixel_reference"))
[docs] def parse_value(self, str_value): """Parse the value and returns one float Parameters ---------- str_value : string string to be parsed Returns ------- floats temperature and delta temperature """ return float(str_value)
[docs]class _nHist(_BaseParser): """Parse the file with the information for the row_cte checks """
[docs] def filename(self): '''Name from the ``row_cte_reference`` section of the ``[hetdex_dithers]`` configuration section''' return os.path.expanduser(self.conf.get("hetdex_dithers", "row_cte_reference"))
[docs] def parse_value(self, str_value): """Parse the remaining column of the row_cte reference file Parameters ---------- str_value : string string to be parsed Returns ------- n_hist : float histogram selection parameter """ return float(str_value)
[docs]class _SkyLevel(_BaseParser): """Parse the file with the information for the sky level check """
[docs] def filename(self): '''Name from the ``sky_level_reference`` section of the ``[hetdex_dithers]`` configuration section''' return os.path.expanduser(self.conf.get("hetdex_dithers", "sky_level_reference"))
[docs] def parse_value(self, str_value): """Parse the remaining column of the sky_level reference file Parameters ---------- str_value : string string to be parsed Returns ------- sky_level : float counts over the median overscan value """ return float(str_value)