# Virus Health Check: a validation tool for HETDEX/VIRUS data
# Copyright (C) 2015, 2016, 2017, 2018 "The HETDEX collaboration"
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""The module provides the tools to parse and retrieve information from
reference files with the following structure ::
# id channel amplifier some_number [some_number2 [..]]
001 L L 1 [ 1 [..]]
001 L U 1 [ 1 [..]]
001 R L 1 [ 1 [..]]
001 R U 1 [ 1 [..]]
...
It is also possible to use wild cards similarly to::
# id channel amplifier some_number [some_number2 [..]]
* * * 1 [ 1 [..]]
001 * U 1 [ 1 [..]]
001 R * 1 [ 1 [..]]
* R U 1 [ 1 [..]]
When using wild cards, the resolution order highlighted in
:meth:`~_BaseParser._key_wildcards` is used.
.. versionadded:: 0.3.0
Each reference file is represented by a class derived from
:class:`_BaseParser`. The method :meth:`_BaseParser.filename` returns the name
of the file to parse and must be overridden in every derived class. Each class
implementing a parser can be registered advertising it in the
``vhc.file_parsers`` entry point group
.. code-block:: python
# in mylib/parser.py
class _AReferenceParser(_BaseParser):
def filename(self):
return "/path/to/reference/file.dat"
# in setup.py
entry_points = {'vhc.file_parsers':
['reference_parser = mylib.parser:_AReferenceParser']
}
See :class:`_BaseParser` for a list and description of all the available
methods.
The registered classes are then initialised by :func:`init`.
After initialization, the classes are accessible via the :func:`picker`
function. Then one can call :class:`~_BaseParser.get_value` method.
.. code-block:: python
parser = picker("reference_parser")
parser.get_value(4, "L", "L")
"""
from __future__ import (absolute_import, division, print_function,
unicode_literals)
import abc
import itertools as it
import os
import six
import libvhc.exceptions as vhcexcept
# dictionary containing the registered classes and, after initialization,
# instances
_registered = {}
_instances = {}
[docs]def init(conf, log):
"""Initialise the registered classes.
This function assumes that all the classes are initialized as
:class:`_BaseParser`
Parameters
----------
conf : :class:`pyhetdex.tools.configuration.ConfigParser` instance
configuration object: should contain any info necessary to use the
instances
log : :class:`logging.LoggerAdapter` or :class:`logging.Logger` instance
logger
"""
# initialise the classes
for k, v in _registered.items():
try:
_instances[k] = v(conf, log)
except Exception:
log.exception("Failed to initialize the parser '%s'", k)
[docs]def picker(name):
"""Pick the instance registered under 'name'. Should be called after
:func:`init`.
Parameters
----------
name : string
name under which the class or instance must be registered
Returns
-------
:class:`_BaseParser` child instance
Raises
------
KeyError
if the name is not registered
"""
return _instances[name]
# parsers implementations
[docs]@six.add_metaclass(abc.ABCMeta)
class _BaseParser(object):
"""Base class for the parsers.
All derived classes must overload :meth:`filename`
Parameters
----------
conf : :class:`pyhetdex.tools.configuration.ConfigParser` instance
configuration object: should contain any info necessary to use the
instances
log : :class:`logging.LoggerAdapter` or :class:`logging.Logger` instance
logger
Attributes
----------
conf, log : as above
"""
def __init__(self, conf, log):
self.conf = conf
self.log = log
self._parsed = {} # dictionary containing the parsed file
self._parse_file()
[docs] def _key(self, id_, channel, amplifier):
"""Return the dictionary key
Parameters
----------
id_, channel, amplifier : string
they should match any of the first columns in the file
Returns
-------
string
dictionary key
"""
key = "{id_}_{ch}_{amp}".format(id_=id_, ch=channel, amp=amplifier)
return key
[docs] def _key_wildcards(self, id_, channel, amplifier):
"""Return the first key of the list below that exist::
id_ channel amplifier
id_ channel *
id_ * amplifier
id_ * *
* channel amplifier
* channel *
* * amplifier
* * *
If none is matched, returns the first one.
Parameters
----------
id_, channel, amplifier : string
they should match any of the first columns in the file
Returns
-------
string
dictionary key
"""
for i, c, a in it.product([id_, '*'], [channel, '*'],
[amplifier, '*']):
key = self._key(i, c, a)
if key in self._parsed:
return key
key = self._key(id_, channel, amplifier)
return key
[docs] def get_value(self, id_, channel, amplifier):
"""Returns the value(s) for the ``id_`` fiber bundle or spectrograph
(or whatever), for the given ``channel``/``amplifier`` pair.
Before returning, the values are parsed through :meth:`parse_value`
This is the only method that should be used.
Parameters
----------
id_, channel, amplifier : string
they should match any of the first columns in the file
Returns
-------
whatever :meth:`parse_value` returns
Raises
------
VHCReferenceKeyError
if the combination of id, channel and amplifier does not exist in
the reference file
"""
key = self._key_wildcards(id_, channel, amplifier)
try:
value = self._parsed[key]
except KeyError as e:
msg = ("The required combination of id: {}, channel: {} and"
" amplifier: {} is not present in the reference file '{}'")
msg = msg.format(id_, channel, amplifier, self.filename())
six.raise_from(vhcexcept.VHCReferenceKeyError(msg), e)
return self.parse_value(value)
[docs] @abc.abstractmethod
def filename(self): # pragma: no cover
"""Abstract method: name of the reference file. Either hard-coded or
from the configuration object.
Returns
-------
filename : string
"""
return ""
[docs] def _parse_file(self):
"""Parse the input file and save it into a dictionary
The key of the dictionary is returned by :meth:`_key` and the value are
the columns from the 4th on.
"""
with open(self.filename()) as f:
for l in f:
if l.startswith("#") or not l.strip():
continue
_id, _ch, _amp, rest = l.split(None, 3)
key = self._key(_id, _ch, _amp)
self._parsed[key] = rest.strip("\n").strip()
self.log.info("'%s' reference file parsed", self.filename())
[docs] def parse_value(self, str_value):
"""Parse the input string. The return value of :meth:`get_value` is
passed through this function.
This implementation just return the input value
Parameters
----------
str_value : string
string to be parsed
Returns
-------
string
parsed string
"""
return str_value
[docs] def expand_file_name(self, fname):
"""If ``fname`` is not absolute, expand the ``fname`` relative to the
directory containing the reference file name returned by
:meth:`filename`
"""
if os.path.isabs(fname):
return fname
else:
ref_path = os.path.split(self.filename())[0]
return os.path.join(ref_path, fname)
[docs]class _Overscan(_BaseParser):
"""Parse the file with the information for the overscan checks
"""
[docs] def filename(self):
'''Name from the ``overscan_reference`` section of the ``[common]``
configuration section'''
return os.path.expanduser(self.conf.get("common",
"overscan_reference"))
[docs] def parse_value(self, str_value):
"""Parse the input string. The return value of
:meth:`~_BaseParser.get_value` is passed through this function.
This implementation just return the input value
Parameters
----------
str_value : string
string to be parsed
Returns
-------
mean, stddev, max_ovc_dev_mean, max_ovc_dev_rms : float
expected mean and standard deviation of the overscan
"""
return [float(i) for i in str_value.split()]
[docs]class _Bias(_Overscan):
"""Parse the file with the information for the bias comparison checks"""
[docs] def filename(self):
'''Name from the ``bias_reference`` section of the ``[bias]``
configuration section'''
return os.path.expanduser(self.conf.get("bias", "bias_reference"))
[docs]class _NFibers(_BaseParser):
"""Parse the file with the information for the n_fibers checks
"""
[docs] def filename(self):
'''Name from the ``nfibers_reference`` section of the ``[flat]``
configuration section'''
return os.path.expanduser(self.conf.get("flat", "nfibers_reference"))
[docs] def parse_value(self, str_value):
"""Parse the remaining columns of the n_fibers reference file
Parameters
----------
str_value : string
string to be parsed
Returns
-------
n_fibers, tolerance : int
expected number of fibers and maximum error
"""
return [int(i) for i in str_value.split()]
[docs]class _MinFlux(_BaseParser):
"""Parse the file with the information for the min_flux checks
"""
[docs] def filename(self):
'''Name from the ``min_flux_reference`` section of the ``[flat]``
configuration section'''
return os.path.expanduser(self.conf.get("flat", "min_flux_reference"))
[docs] def parse_value(self, str_value):
"""Parse the remaining column of the min_flux reference file
Parameters
----------
str_value : string
string to be parsed
Returns
-------
n_fibers : float
minimum allowed flux
"""
return float(str_value)
[docs]class _Saturation(_BaseParser):
"""Parse the file with the information for the saturation checks
"""
[docs] def filename(self):
'''Name from the ``saturation_reference`` section of the ``[common]``
configuration section'''
return os.path.expanduser(self.conf.get("common",
"saturation_reference"))
[docs] def parse_value(self, str_value):
"""Parse the remaining column of the saturation reference file
Parameters
----------
str_value : string
string to be parsed
Returns
-------
n_fibers : float
maximum allowed flux
"""
sat, max_n = str_value.split()
return float(sat), int(max_n)
[docs]class _Distortion(_BaseParser):
"""Parse the file containing the list of reference distortion files
"""
[docs] def filename(self):
'''Name from the ``distortion_reference`` section of the ``[common]``
configuration section'''
return os.path.expanduser(self.conf.get("common",
"distortion_reference"))
[docs] def parse_value(self, str_value):
"""Parse the name of the reference distortion file
Parameters
----------
str_value : string
string to be parsed
Returns
-------
dist_file : string
name of the distortion file
"""
return self.expand_file_name(str_value.strip())
[docs]class _Arcs(_BaseParser):
"""Parse the file containing the list of reference line list files and
maximum deviation of the arcs peaks from the expected position
"""
[docs] def filename(self):
'''Name from the ``arcs_reference`` section of the ``[arc]``
configuration section'''
return os.path.expanduser(self.conf.get("arc", "arcs_reference"))
[docs] def parse_value(self, str_value):
"""Parse the name of the reference distortion file and the number of
pixel tolerance
Parameters
----------
str_value : string
string to be parsed
Returns
-------
line_list : string
name of the line list file
peak_tolerance : int
maximum tolerance for arc peak shift
"""
line_list, peak_tolerance = str_value.split()
return self.expand_file_name(line_list), int(peak_tolerance)
[docs]class _Dettemp(_BaseParser):
"""Parse the file containing the maximum detector temperature and the
maximum allowed difference between the commanded and the actual temperature
"""
[docs] def filename(self):
'''Name from the ``dettemp_reference`` section of the ``[common]``
configuration section'''
return os.path.expanduser(self.conf.get("common", "dettemp_reference"))
[docs] def parse_value(self, str_value):
"""Parse the value and returns two floats
Parameters
----------
str_value : string
string to be parsed
Returns
-------
floats
temperature and delta temperature
"""
return [float(i) for i in str_value.split()]
[docs]class _Nullpixel(_BaseParser):
"""Parse the file containing the maximum number of null pixels
"""
[docs] def filename(self):
'''Name from the ``nullpixel_reference`` section of the ``[common]``
configuration section'''
return os.path.expanduser(self.conf.get("common",
"nullpixel_reference"))
[docs] def parse_value(self, str_value):
"""Parse the value and returns one float
Parameters
----------
str_value : string
string to be parsed
Returns
-------
floats
temperature and delta temperature
"""
return float(str_value)
[docs]class _nHist(_BaseParser):
"""Parse the file with the information for the row_cte checks
"""
[docs] def filename(self):
'''Name from the ``row_cte_reference`` section of the ``[hetdex_dithers]``
configuration section'''
return os.path.expanduser(self.conf.get("hetdex_dithers",
"row_cte_reference"))
[docs] def parse_value(self, str_value):
"""Parse the remaining column of the row_cte reference file
Parameters
----------
str_value : string
string to be parsed
Returns
-------
n_hist : float
histogram selection parameter
"""
return float(str_value)
[docs]class _SkyLevel(_BaseParser):
"""Parse the file with the information for the sky level check
"""
[docs] def filename(self):
'''Name from the ``sky_level_reference`` section of the ``[hetdex_dithers]``
configuration section'''
return os.path.expanduser(self.conf.get("hetdex_dithers",
"sky_level_reference"))
[docs] def parse_value(self, str_value):
"""Parse the remaining column of the sky_level reference file
Parameters
----------
str_value : string
string to be parsed
Returns
-------
sky_level : float
counts over the median overscan value
"""
return float(str_value)