Source code for libvhc.vhc

# Virus Health Check: a validation tool for HETDEX/VIRUS data
# Copyright (C) 2015, 2016, 2017, 2018  "The HETDEX collaboration"
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""Entry point for ``vhc``

Main tasks

* load the configuration file
* set up logging and, if needed, multiprocessing system
* load plugin hooks
* set up the html recap
* check the recipe
* get the list of drivers
* execute the drivers
* write the html recap file
"""
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

import argparse as ap
import configparser
import glob
import multiprocessing
import os
import warnings

from pyhetdex.tools import processes
from pyhetdex.tools.files.file_tools import FileNameRotator

import libvhc
import libvhc.config as vhcconf
from libvhc.drivers import get_drivers, DRIVER_FILE_COUNTER
from libvhc.exceptions import VHCPathError
import libvhc.html as vhchtml
import libvhc.loaders as vhcload
import libvhc.loggers as vhclog
from libvhc.recipes import get_recipe
import libvhc.reference_file_parser as vhcrfp
import libvhc.utils as vhcutil


[docs]def abs_dir(value): """Check that the input value is a directory and, if True, return its absolute path Parameters ---------- value : string value to validate Returns ------- string absolute path to ``value`` Raises ------ """ value = os.path.abspath(value) if not os.path.isdir(value): raise ap.ArgumentTypeError("Not a directory: {}".format(value)) return value
[docs]def parse_cl(argv=None): """Parse the command line arguments Parameters ---------- argv : list of strings, optional command line arguments, except the caller name. If ``None``, it is set to ``sys.argv[1:]`` Returns ------- Namespace parsed command line arguments """ description = """Virus Health Check (VHC): check that Virus files contained into the given directory are fine""" epilog = '''The official configuration file can be found in the ``vhc_config`` svn repository. The minimum suggested svn revision is {}. See the documentation (http://www.mpe.mpg.de/~montefra/documentation/vhc/latest) for more information.'''.format(vhcconf.SVN_REVISION) p = ap.ArgumentParser(description=description, epilog=epilog, parents=[vhcutil.common_parser_arguments(), ], formatter_class=ap.ArgumentDefaultsHelpFormatter, ) p.add_argument('path', help="Path containing the files to explore", type=abs_dir) p.add_argument('subdir', nargs='?', help='''For the drivers shipped with VHC: the files will be collected only from the ``%(dest)s`` subdirectory of ``path``. Third party drivers might interpret this argument as the first part of ``extra``.''') p.add_argument('extra', nargs="*", help='''Any extra arguments; ignored by the drivers shipped with VHC''') p.add_argument('-c', '--config', help="""Configuration file. If provided, overrides the search for it in the default positions.""") title = 'Override options in the [fplane] section' overrides_fplane = p.add_argument_group(title=title) overrides_fplane.add_argument('-f', '--fplane-file', help="""Override the fplane file name with the given option""", dest='setting__fplane__fplane_file', metavar='FPLANE_FILE') return p.parse_args(args=argv)
[docs]def envvar_to_path(env_name='CUREBIN'): '''Prepend the curebin to the PATH to avoid having to explicitly provide it to the commands Parameters ---------- env_name : str, optional name of the environment variable to push to the environment ''' try: curebin = os.environ[env_name] os.environ['PATH'] = curebin + ':' + os.environ.get('PATH', '') except KeyError: warnings.warn('"{}" environment variable is not set. Commands from' ' that path will fail'.format(env_name))
[docs]def check_is_shot(path, conf): '''Uses the configuration option ``shot_subdir`` in the ``[general]`` section to decide whether abort vhc or not. If the option is present and non-empty, the check is performed and an exception is raised if no match is found. Parameters ---------- path : string path to check conf : :class:`pyhetdex.tools.configuration.ConfigParser` configuration object Raises ------ libvhc.exceptions.VHCPathError if the path doesn't have ``shot_subdir`` as subdirectory ''' shot_subdir = conf['general'].get('shot_subdir', '') if shot_subdir: subdirs = glob.glob(os.path.join(path, shot_subdir)) if not subdirs: raise VHCPathError('The path "{}" does not have any subdirectory' ' matching the pattern "{}". This likely means' ' that the path passed to vhc it is not a shot' ' directory'.format(path, shot_subdir))
[docs]def log_svn_info(log, svn_info): '''Get the svn info from the configuration and log them. Return the dictionary with the entries Parameters ---------- log : :class:`logging.Logger` logger to use svn_info : dictionary-like information to log ''' for value in svn_info.values(): log.info('configuration file %s', value)
[docs]def get_worker(name, conf, log): '''Create a worker and return it Parameters ---------- name : string name to associate to the worker conf : :class:`pyhetdex.tools.configuration.ConfigParser` configuration object log : :class:`logging.Logger` logger to use Returns ------- :class:`~pyhetdex.tools.processes._Worker` ''' use_multiprocessing = conf.getboolean("general", "multiprocessing") n_processors = conf.getint("general", "n_processors") if n_processors <= 0 or n_processors >= multiprocessing.cpu_count(): n_processors = multiprocessing.cpu_count() if use_multiprocessing: log.info("Running in multiprocessor mode with %d processors", n_processors) else: log.info("Running in single processor mode") worker = processes.get_worker(name=name, multiprocessing=use_multiprocessing, processes=n_processors) return worker
[docs]def run_drivers(path, argv, drivers, available_drivers, vcheck): '''Run the given drivers Parameters ---------- path : string path passed to vhc argv : list of strings extra arguments passed to the drivers drivers : list of strings name of the drivers to execute available_drivers : dictionary map between driver names and callable implementing them vcheck : :class:`libvhc.VCheck` vcheck object to pass around ''' for driver in drivers: vcheck.check = driver # save the check name vhclog.update_logger(vcheck, name=path) log = vhclog.getLogger(name=path) try: # get the driver driver_func = available_drivers[driver] except KeyError: msg = "the driver is not found" vhchtml.add_fplane_test(vcheck, msg, False) log.critical(msg) continue try: # execute the driver driver_func(vcheck, path, argv) except Exception as e: msg = "something unexpected happened. Exception: " msg += str(e) vhchtml.add_fplane_test(vcheck, msg, False) log.critical(msg, exc_info=True) continue
[docs]def main(argv=None): """Main function. Does the setup, get or create the recipe and driver files and run the drivers. Parameters ---------- argv : list of strings, optional command line arguments, except the caller name. If ``None``, it is set to ``sys.argv[1:]`` """ args = parse_cl(argv=argv) envvar_to_path() path = args.path argv = args.extra if args.subdir: argv = [args.subdir, ] + argv conf_name = 'default' # load configuration vhcconf.load_config(name=conf_name, conf_file=args.config, args=args) conf = vhcconf.get_config(name=conf_name) conf_svn_info = vhcconf.conf_svn_info(conf) check_is_shot(path, conf) # create the file names with a proper rotation rotator = FileNameRotator(path, touch_files=False, logfile=vhclog.LOG_FILE, result_file=vhclog.RESULT_FILE, json_file=vhclog.JSON_FILE, html_recap=vhchtml.HTML_RECAP, driver_file=DRIVER_FILE_COUNTER) # Create the json file only if write_json is set to yes json_file = '' if conf.getboolean("general", "write_json"): json_file = rotator.json_file # set up the logging system vhclog.set_logger(path, rotator.logfile, rotator.result_file, json_file) log = vhclog.getLogger(name=path) with vhclog.logger_context(path): # Add the configuration file version at the beginning of the log.txt # file log_svn_info(log, conf_svn_info) # load the recipes and get the recipe name recipe = get_recipe(path, log) # update the logger # create the VCheck instance only once, update the check attribute in # the loop after reading the driver file vcheck = libvhc.VCheck(recipe) vhclog.update_logger(vcheck, name=path) log = vhclog.getLogger(name=path) # load the drivers and get the list of drivers to run drivers, available_drivers = get_drivers(path, recipe, conf, log, out_driver_file=rotator.driver_file) # setup html render vhchtml.init_renderer(recipe, conf.get("fplane", "fplane_file")) with vhchtml.html_context(recipe): log.info("html recap renderer initialised") # pass to the html render the information about the svn # configuration info vhchtml.get_queue(recipe).put(['config_info', [conf_svn_info, ]]) # Load and initialise the reference file parsers vhcload.load_reference_file_parsers(log) vhcrfp.init(conf, log) # setup multiprocessing worker = get_worker(path, conf, log) with worker: run_drivers(path, argv, drivers, available_drivers, vcheck=vcheck) worker.wait() processes.remove_worker(name=path) # write html vhchtml.render_html(recipe, rotator, path, xmin=conf.getfloat("html", "xmin"), ymin=conf.getfloat("html", "ymin"), xscale=conf.getfloat("html", "xscale"), yscale=conf.getfloat("html", "yscale"), open_html=conf.getboolean("html", "open_html")) copy_html(recipe, conf)
[docs]def copy_html(recipe, conf): '''Check the configuration options for copying the html file and make the copy if necessary. Parameters ---------- recipe : string name of the recipe conf : :class:`pyhetdex.tools.configuration.ConfigParser` instance configuration object: should contain any info necessary to use the instances ''' try: copy_html_name = conf.get('html', 'copy_html_name') parent_exists = os.path.isdir(os.path.dirname(copy_html_name)) except configparser.NoOptionError: copy_html_name = None parent_exists = False try: open_copy_html = conf.getboolean('html', 'open_copy_html') except configparser.NoOptionError: open_copy_html = False if parent_exists: vhchtml.write_html(recipe, copy_html_name, open_html=open_copy_html)