Source code for overwatch.processing.processingClasses

#!/usr/bin/env python

""" Classes that define the processing structure of Overwatch.

Classes that define the structure of processing. This information can be created and processed,
or read from file.

Note:
    For the ``__repr__`` and ``__str__`` methods defined here, they can throw ``KeyError`` for class attributes
    if the these methods rely on ``__dict__`` and the objects have just been loaded from ZODB. Presumably, ``__dict__``
    doesn't cause ZODB to fully load the object. To work around this issue, any methods using ``__dict__`` first
    call some attribute (ideally, something simple) to ensure that the object is fully loaded. The result of that call
    is ignored.

.. codeauthor:: Raymond Ehlers <raymond.ehlers@cern.ch>, Yale University
"""

from __future__ import print_function
from __future__ import absolute_import
from future.utils import iteritems
from future.utils import itervalues

# Database
import BTrees.OOBTree
import persistent

import os
import pendulum
import logging
# Setup logger
logger = logging.getLogger(__name__)

from ..base import utilities
from ..base import config
#from config.processingParams import processingParameters
(processingParameters, filesRead) = config.readConfig(config.configurationType.processing)

[docs]class runContainer(persistent.Persistent): """ Object to represent a particular run. It stores run level information, as well the subsystems which then containing the corresponding event information (histogram groups, histograms, etc). Note that files are *not* considered event level information because the files correspond to individual subsystem. Furthermore, in rare cases, there may be numbers of files for different subsystems that are included in an individual run. Consequently, it is cleaner for each subsystem to track it's own files. To allow the object to be reconstructed from scratch, the HLT mode is stored by writing a YAML file in the corresponding run directory. This file is referred to as the "run info" file. Additional properties could also be written to this file to avoid the loss of transient information. Note: The run info file is read and written on object construction. It will only be checked if the HLT mode is not set. Args: runDir (str): String containing the run number. For an example run 123456, it should be formatted as ``Run123456``. fileMode (bool): If true, the run data was collected in cumulative mode. See the :doc:`processing README </processingReadme>` for further information. hltMode (str): String containing the HLT mode used for the run. Attributes: runDir (str): String containing the run number. For an example run 123456, it should be formatted as ``Run123456`` runNumber (int): Run number extracted from the ``runDir``. prettyName (str): Reformatting of the ``runDir`` for improved readability. fileMode (bool): If true, the run data was collected in cumulative mode. See the :doc:`processing README </processingReadme>` for further information. Set via ``fileMode``. subsystems (BTree): Dict-like object which will contain all of the subsystem containers in an event. The key is the corresponding subsystem three letter name. hltMode (str): Mode the HLT operated in for this run. Valid HLT modes are "B", "C", "E", and "U". Further information on the various modes is in the :doc:`processing README </processingReadme>`. Default: ``None`` (which will be converted to "U", for "unknown"). """ def __init__(self, runDir, fileMode, hltMode = None): self.runDir = runDir self.runNumber = int(runDir.replace("Run", "")) self.prettyName = "Run {runNumber}".format(runNumber = self.runNumber) self.mode = fileMode self.subsystems = BTrees.OOBTree.BTree() self.hltMode = hltMode # Try to retrieve the HLT mode if it was not passed runDirectory = os.path.join(processingParameters["dirPrefix"], self.runDir) if not hltMode: self.hltMode = utilities.retrieveHLTModeFromStoredRunInfo(runDirectory = runDirectory) # Write run information utilities.writeRunInfoToFile(runDirectory = runDirectory, hltMode = hltMode) def __repr__(self): """ Representation of the object. """ # Dummy call. See note at the top of the module. self.runDir return "{}(runDir = {runDir}, fileMode = {mode}, hltMode = {hltMode})".format(self.__class__.__name__, **self.__dict__) def __str__(self): """ Print many of the elements of the object. """ return "{}: runDir: {runDir}, runNumber: {runNumber}, prettyName: {prettyName}, fileMode: {mode}," \ " subsystems: {subsystems}, hltMode: {hltMode}".format(self.__class__.__name__, runDir = self.runDir, runNumber = self.runNumber, prettyName = self.prettyName, mode = self.mode, subsystems = list(self.subsystems.keys()), hltMode = self.hltMode)
[docs] def isRunOngoing(self): """ Checks if a run is ongoing. The ongoing run check is performed by looking checking for a new file in any of the subsystems. If they have just received a new file, then the run is ongoing. Note: If ``subsystem.newFile`` is false, this is not a sufficient condition to say that the run has ended. This is because ``newFile`` will be set to false if the subsystem didn't have a file in the most recent processing run, even if the run is still ongoing. This can happen for many reasons, including if the processing is executed more frequently than the data transfer rate or receiver request rate, for example. However, if ``newFile`` is true, then it is sufficient to know that the run is ongoing. Args: None Returns: bool: True if the run is ongoing. """ returnValue = False try: for subsystem in itervalues(self.subsystems): if subsystem.newFile is True: # We know we have a new file, so nothing else needs to be done. Just return it. returnValue = True break # If we haven't found a new file yet, we'll check the time stamps. if returnValue is False: logger.debug("Checking timestamps for whether the run in ongoing.") minutesSinceLastTimestamp = self.minutesSinceLastTimestamp() logger.debug("{minutesSinceLastTimestamp} minutes since the last timestamp.".format(minutesSinceLastTimestamp = minutesSinceLastTimestamp)) # Compare the unix timestamps with a five minute buffer period. # This buffer time is arbitrarily selected, but the value is motivated by a balance to ensure # that a missed file doesn't cause the run to appear over, while also not claiming that the # run continues much longer than it actually does. if minutesSinceLastTimestamp < 5: returnValue = True except KeyError: returnValue = False return returnValue
[docs] def minutesSinceLastTimestamp(self): """ Determine the time since the last file timestamp in minutes. Args: None. Returns: float: Minutes since the timestamp of the most recent file. Default: -1. """ timeSinceLastTimestamp = -1 try: mostRecentTimestamp = -1 for subsystem in itervalues(self.subsystems): newestFile = subsystem.files[subsystem.files.keys()[-1]] if newestFile.fileTime > mostRecentTimestamp: mostRecentTimestamp = newestFile.fileTime # The timestamps of the files are set in Geneva, so we need to construct the timestamp in Geneva # to compare against. The proper timezone for this is "Europe/Zurich". geneva = pendulum.from_timestamp(mostRecentTimestamp, tz = "Europe/Zurich") now = pendulum.now() # Return in minutes timeSinceLastTimestamp = now.diff(geneva).in_minutes() except KeyError: # If there is a KeyError somewhere, we just ignore it and pass back the default value. pass return timeSinceLastTimestamp
[docs] def startOfRunTimeStamp(self): """ Provides the start of the run time stamp in a format suitable for display. This timestamp is determined by looking at the timestamp of the last subsystem (arbitrarily selected) that is available in the run. No time zone conversion is performed, so it simply displays the time zone where the data was stored (CERN time in production systems). Args: None Returns: str: Start of run time stamp formatted in an appropriate manner for display. """ returnValue = False try: # We just take the last subsystem in a given run. Any will do lastSubsystem = self.subsystems[self.subsystems.keys()[-1]] returnValue = lastSubsystem.prettyPrintUnixTime(lastSubsystem.startOfRun) except KeyError: returnValue = False return returnValue
[docs]class subsystemContainer(persistent.Persistent): """ Object to represent a particular subsystem (detector). It stores subsystem level information, including the histograms, groups, and file information. It is the main container for much of the information that is relevant for processing. Information on the file storage layout implemented through this class is available in the :doc:`processing README </processingReadme>`. Note: This object checks for and creates a number of directories on initialization. Args: subsystem (str): The current subsystem in the form of a three letter, all capital name (ex. ``EMC``). runDir (str): String containing the run number. For an example run 123456, it should be formatted as ``Run123456`` startOfRun (int): Start of the run in unix time. endOfRun (int): End of the run in unix time. showRootFiles (bool): True if the ROOT files should be made accessible through the run list. Default: ``False``. fileLocationSubsystem (str): Subsystem name of where the files are actually located. If a subsystem has specific data files then this is just equal to the `subsystem`. However, if it relies on files inside of another subsystem (such as those from the HLT subsystem receiver), then this variable is equal to that subsystem name. Default: ``None``, which corresponds to the subsystem storing it's own data. Attributes: subsystem (str): The current subsystem in the form of a three letter, all capital name (ex. ``EMC``). showRootFiles (bool): True if the ROOT files should be made accessible through the run list. fileLocationSubsystem (str): Subsystem name of where the files are actually located. If a subsystem has specific data files then this is just equal to the `subsystem`. However, if it relies on files inside of another subsystem, then this variable is equal to that subsystem name. files (BTree): Dict-like object which describes subsystem ROOT files. Unix time of a given file is the key and a file container for that file is the value. timeSlices (BTree): Dict-like object which describes subsystem time slices. A UUID is the dict key (so they can be uniquely identified), while a timeSliceContainer with the corresponding time slice properties is the value. combinedFile (fileContainer): File container corresponding to the combined file. baseDir (str): Path to the base storage directory for the subsystem. Of the form ``Run123456/SYS``. imgDir (str): Path to the image storage directory for the subsystem. Of the form ``Run123456/SYS/img``. jsonDir (str): Path to the json storage directory for the subsystem. Of the form ``Run123456/SYS/json``. startOfRun (int): Start of the run in unix time. endOfRun (int): End of the run in unix time. runLength (int): Length of the run in minutes. histGroups (PersistentList): List-like object of histogram groups, which are used to classify similar histograms. histsInFile (BTree): Dict-like object of all histograms that are in a particular file. Keys are the histogram name, while the values are ``histogramContainer`` objects which contain the histogram. Hists should be usually be accessed through the hist groups, but list this provides direct access when necessary early in processing. histsAvailable (BTree): Dict-like object containing all histograms that are available, including those in a particular file and those that are created during processing. Newly created hists should be stored in this dict. Keys are histogram names, while values are ``histogramContainer`` objects which contain the histogram. hists (BTree): Dict-like object which contains all histograms that should be processed by a histogram. After initial creation, this should be the definitive source of histograms for processing and display. Keys are histogram names, while values are ``histogramContainer`` objects which contain the histogram. newFile (bool): True if we received a new file, while will trigger reprocessing. This flag should only be changed when beginning processing the next time. To be explicit, if a subsystem just received a new file and it was processed, this flag should only be changed to ``False`` after the next processing iteration begins. This allows the status of the run (determined through the subsystem) to be displayed in the web app. Default: True because if the subsystem is being created, we likely need reprocessing. nEvents (int): Number of events in the subsystem. Processing will look for a histogram that contains ``events`` in the name and attempt to extract the number of events based on the number of entries. Should not be used unless the subsystem explicitly includes a histogram with the number of events. Default: 1. processingOptions (PersistentMapping): Implemented by the subsystem to note options used during standard processing. The subsystem processing options can vary when processing a time slice, so storing the options allow us to return to the standard options when performing a full processing. Keys are the option names as string, while values are their corresponding values. """ def __init__(self, subsystem, runDir, startOfRun, endOfRun, showRootFiles = False, fileLocationSubsystem = None): self.subsystem = subsystem self.showRootFiles = showRootFiles # If data does not exist for this subsystem then it is dependent on HLT data # Detect it automatically if not passed to the initialization. if fileLocationSubsystem is None: # Use the subsystem directory as proxy for whether it exists. # NOTE: This detection works, but it isn't so flexible. if os.path.exists(os.path.join(processingParameters["dirPrefix"], runDir, subsystem)): self.fileLocationSubsystem = self.subsystem else: self.fileLocationSubsystem = "HLT" else: self.fileLocationSubsystem = fileLocationSubsystem if self.showRootFiles is True and self.subsystem != self.fileLocationSubsystem: logger.info("It is requested to show ROOT files for subsystem {subsystem}, but the subsystem does not have specific data files. Using HLT data files!".format(subsystem = subsystem)) # Files # Be certain to set these after the subsystem has been created! # Contains all files for that particular run self.files = BTrees.OOBTree.BTree() self.timeSlices = persistent.mapping.PersistentMapping() # Only one combined file, so we do not need a dict! self.combinedFile = None # Directories self.setupDirectories(runDir) # Times self.startOfRun = startOfRun self.endOfRun = endOfRun # The run length is in minutes self.runLength = self.calculateRunLength() # Histograms self.histGroups = persistent.list.PersistentList() # Should be accessed through the group usually, but this provides direct access self.histsInFile = BTrees.OOBTree.BTree() # All hists, including those which were created, along with those in the file self.histsAvailable = BTrees.OOBTree.BTree() # Hists list that should be used self.hists = BTrees.OOBTree.BTree() # True if we received a new file, therefore leading to reprocessing # If the subsystem is being created, we likely need reprocessing, so defaults to true self.newFile = True # Number of events in the subsystem. The processing will attempt to determine the number of events, # but it is a subsystem dependent quantity. It needs explicit support. self.nEvents = 1 # Processing options self.processingOptions = persistent.mapping.PersistentMapping()
[docs] def calculateRunLength(self, startOfRun = None, endOfRun = None): """ Helper function to update the run length. Note: The run length is defined in minutes. Args: startOfRun (int): Start of the run in unix time. Default: ``None``. If not specified, the ``startOfRun`` stored in the subsystem will be used. endOfRun (int): End of the run in unix time. Default: ``None``. If not specified, the ``startOfRun`` stored in the subsystem will be used. Returns: int: The calculated run length in minutes. """ if startOfRun is None: startOfRun = self.startOfRun if endOfRun is None: endOfRun = self.endOfRun # The run length is in minutes runLength = (endOfRun - startOfRun) // 60 return runLength
[docs] def setupDirectories(self, runDir): """ Helper function to setup the subsystem directories. Defines the base, img, and JSON directories, as well as creating the them if necessary. Args: runDir (str): String containing the run number. For an example run 123456, it should be formatted as ``Run123456`` Returns: None. However, it sets the ``baseDir``, ``imgDir``, and ``jsonDir`` properties of the ``subsystemContainer``. """ # Depends on whether the subsystem actually contains the files! self.baseDir = os.path.join(runDir, self.fileLocationSubsystem) self.imgDir = os.path.join(self.baseDir, "img") self.jsonDir = os.path.join(self.baseDir, "json") # Ensure that they exist if not os.path.exists(os.path.join(processingParameters["dirPrefix"], self.imgDir)): os.makedirs(os.path.join(processingParameters["dirPrefix"], self.imgDir)) if not os.path.exists(os.path.join(processingParameters["dirPrefix"], self.jsonDir)): os.makedirs(os.path.join(processingParameters["dirPrefix"], self.jsonDir))
def __repr__(self): """ Representation of the object. """ return "{}(subsystem = {subsystem}, runDir = {runDir}, startOfRun = {startOfRun}," \ " endOfRun = {endOfRun}, showRootFiles = {showRootFiles}," \ " fileLocationSubsystem = {fileLocationSubsystem})".format(self.__class__.__name__, subsystem = self.subsystem, runDir = os.path.dirname(self.baseDir), startOfRun = self.startOfRun, endOfRun = self.endOfRun, showRootFiles = self.showRootFiles, fileLocationSubsystem = self.fileLocationSubsystem) def __str__(self): """ Print many of the elements of the object. """ return "{}: subsystem: {subsystem}, fileLocationSubsystem: {fileLocationSubsystem}," \ " showRootFiles: {showRootFiles}, startOfRun: {startOfRun}, endOfRun: {endOfRun}," \ " newFile: {newFile}, hists: {hists}".format(self.__class__.__name__, subsystem = self.subsystem, fileLocationSubsystem = self.fileLocationSubsystem, showRootFiles = self.showRootFiles, startOfRun = self.startOfRun, endOfRun = self.endOfRun, newFile = self.newFile, hists = list(self.hists.keys()))
[docs] @staticmethod def prettyPrintUnixTime(unixTime): """ Converts the given time stamp into an appropriate manner ("pretty") for display. The time is returned in the format: "Tuesday, 6 Nov 2018 20:55:10". This function is mainly needed in Jinja templates were arbitrary functions are not allowed. Note: We display this in the CERN time zone, so we convert it here to that timezone. Args: unixTime (int): Unix time to be converted. Returns: str: The time stamp converted into an appropriate manner for display. """ d = pendulum.from_timestamp(unixTime, tz = "Europe/Zurich") return d.format("dddd, D MMM YYYY HH:mm:ss")
[docs] def resetContainer(self): """ Clear the stored hist information so we can recreate (reprocess) the subsystem. Without resetting the container, reprocessing doesn't fully test the processing functions, which are skipped if these list- and dict-like hist objects have entries. Args: None Returns: None """ del self.histGroups[:] self.histsInFile.clear() self.histsAvailable.clear() self.hists.clear()
[docs]class timeSliceContainer(persistent.Persistent): """ Time slice information container. Contains information about a time slice request, including the time ranges and the files involved. These values are required to uniquely describe a time slice. Args: minUnixTimeRequested (int): Minimum requested unix time. This is the first time stamp to be included in the time slice. maxUnixTimeRequested (int): Maximum requested unix time. This is the last time stamp to be included in the time slice. minUnixTimeAvailable (int): Minimum unix time of the run. maxUnixTimeAvailable (int): Maximum unix time of the run. startOfRun (int): Unix time of the start of the run. filesToMerge (list): List of fileContainer objects which need to be merged to create the time slice. optionsHash (str): SHA1 hash of the processing options used to construct the time slice. Attributes: minUnixTimeRequested (int): Minimum requested unix time. This is the first time stamp to be included in the time slice. maxUnixTimeRequested (int): Maximum requested unix time. This is the last time stamp to be included in the time slice. minUnixTimeAvailable (int): Minimum unix time of the run. maxUnixTimeAvailable (int): Maximum unix time of the run. startOfRun (int): Unix time of the start of the run. filesToMerge (list): List of fileContainer objects which need to be merged to create the time slice. optionsHash (str): SHA1 hash of the processing options used to construct the time slice. This hash is used for caching by comparing the processing options for a new time slice request with those already processed. If the hashes are the same, we can directly return the already processed result. filenamePrefix (str): Filename for the timeSlice file, based on the given start and end times. filename (fileContainer): File container for the timeSlice file. processingOptions (PersistentMapping): Implemented by the time slice container to note options used during standard processing. The time slice processing options can vary when compared to standard subsystem processing, so storing the options allow us to apply the custom time slice options. """ def __init__(self, minUnixTimeRequested, maxUnixTimeRequested, minUnixTimeAvailable, maxUnixTimeAvailable, startOfRun, filesToMerge, optionsHash): # Requested times self.minUnixTimeRequested = minUnixTimeRequested self.maxUnixTimeRequested = maxUnixTimeRequested # Available times self.minUnixTimeAvailable = minUnixTimeAvailable self.maxUnixTimeAvailable = maxUnixTimeAvailable # Start of run is also in unix time self.startOfRun = startOfRun self.optionsHash = optionsHash # File containers of the files to merge self.filesToMerge = filesToMerge # Filename prefix for saving out files self.filenamePrefix = "timeSlice.{}.{}.{}".format(self.minUnixTimeAvailable, self.maxUnixTimeAvailable, self.optionsHash) # Create filename self.filename = fileContainer(self.filenamePrefix + ".root") # Processing options # Implemented by the detector to note how it was processed that may be changed during time slice processing # This allows us return full processing when appropriate # Same as the type of options implemented in the subsystemContainer! self.processingOptions = persistent.mapping.PersistentMapping() def __repr__(self): """ Representation of the object. """ # Dummy call. See note at the top of the module. self.minUnixTimeRequested return "{}(minUnixTimeRequested = {minUnixTimeRequested}, maxUnixTimeRequested = {maxUnixTimeRequested}," \ " minUnixTimeAvailable = {minUnixTimeAvailable}, maxUnixTimeAvailable = {maxUnixTimeAvailable}," \ " startOfRun = {startOfRun}, filesToMerge = {filesToMerge}," \ " optionsHash = {optionsHash}".format(self.__class__.__name__, **self.__dict__) def __str__(self): """ Print many of the elements of the object. """ return "{}: minUnixTimeRequested = {minUnixTimeRequested}, maxUnixTimeRequested = {maxUnixTimeRequested}," \ " minUnixTimeAvailable = {minUnixTimeAvailable}, maxUnixTimeAvailable = {maxUnixTimeAvailable}," \ " filenamePrefix: {filenamePrefix}, startOfRun = {startOfRun}, filesToMerge = {filesToMerge}," \ " optionsHash = {optionsHash}".format(self.__class__.__name__, minUnixTimeRequested = self.minUnixTimeRequested, maxUnixTimeRequested = self.maxUnixTimeRequested, minUnixTimeAvailable = self.minUnixTimeAvailable, maxUnixTimeAvailable = self.maxUnixTimeAvailable, filenamePrefix = self.filenamePrefix, startOfRun = self.startOfRun, filesToMerge = self.filesToMerge, optionsHash = self.optionsHash)
[docs] def timeInMinutes(self, inputTime): """ Return the time from the input unix time to the start of the run in minutes. Args: inputTime (int): Unix time to be compared to the start of run time. Returns: int: Minutes from the start of run to the given time. """ #logger.debug("inputTime: {inputTime}, startOfRun: {startOfRun}".format(inputTime = inputTime, startOfRun = self.startOfRun)) return (inputTime - self.startOfRun) // 60
[docs] def timeInMinutesRounded(self, inputTime): """ Return the time from the input unix time to start of the run in minutes, rounded to the nearest minute. Note: I believe this was created due to some float vs int issues in the Jinja templating system. Although the purpose of this function isn't entirely clear, it is kept for compatibility purposes. Args: inputTime (int): Unix time to be compared to the start of run time. Returns: int: Minutes from the start of run to the given time. """ return round(self.timeInMinutes(inputTime))
[docs]class fileContainer(persistent.Persistent): """ File information container. This object wraps a ROOT filename, providing convenient access to relevant properties, such as the type of file (combined, timeSlice, standard), and the time stamp. This information is often stored in the filename itself, but extraction procedures vary for each file type. Note that it *does not* open the file itself - this is still the responsibility of the user. Args: filenae (str): Filename of the corresponding file. This is expected to the full path from the ``dirPrefix`` to the file. startOfRun (int): Start of the run in unix time. Default: ``None``. The default will lead to timeIntoRun being set to ``-1``. The default is most commonly used for time slices, where the start of run isn't so meaningful. Attributes: filenae (str): Filename of the corresponding file. This is expected to the full path from the ``dirPrefix`` to the file. combinedFile (bool): True if this file corresponds to a combined file. It is set to ``True`` if "combined" is in the filename. timeSlice (bool): True if this file corresponds to a time slice. It is set to ``True`` if "timeSlice" in in the filename. fileTime (int): Unix time stamp of the file, extracted from the filename. timeIntoRun (int): Time in seconds from the start of the run to the file time. Depends on startOfRun being a valid time when the object was created. """ def __init__(self, filename, startOfRun = None): self.filename = filename # Determine types of file self.combinedFile = False self.timeSlice = False if "combined" in self.filename: self.combinedFile = True elif "timeSlice" in self.filename: self.timeSlice = True # The combined file time will be the length of the run # The time slice will be the length of the time slice self.fileTime = utilities.extractTimeStampFromFilename(self.filename) if startOfRun: self.timeIntoRun = self.fileTime - startOfRun else: # Show a clearly invalid time, since timeIntoRun doesn't make much sense for a time slice self.timeIntoRun = -1 def __repr__(self): """ Representation of the object. """ return "{}(filename = {filename}, startOfRun = {startOfRun})".format(self.__class__.__name__, filename = self.filename, startOfRun = self.fileTime - self.timeIntoRun) def __str__(self): """ Print the elements of the object. """ # Dummy call. See note at the top of the module. self.filename return "{}: filename = {filename}, combinedFile: {combinedFile}, timeSlice: {timeSlice}," \ " fileTime: {fileTime}, timeIntoRun: {timeIntoRun}".format(self.__class__.__name__, **self.__dict__)
[docs]class histogramGroupContainer(persistent.Persistent): """ Organizes similar histograms into groups for processing and display. Histograms groups are created by providing name substrings of histogram which should be included. The name substring is referred to as a ``groupSelectionPattern``. For example, if the pattern was "hello", all histograms containing "hello" would be selected. Additional properties related to groups, such as display information, are also stroed. Args: prettyName (str): Readable name of the group. groupSelectionPattern (str): Pattern of the histogram names that will be selected. For example, if wanted to select histograms related to EMCal patch amplitude, we would make the pattern something like "PatchAmp". The pattern depends on the name of the histograms sent from the HLT. plotInGridSelectionPattern (str): Pattern which denotes whether the histograms should be plotted in a grid. ``plotInGrid`` is set based on whether this value is in ``groupSelectionPattern``. For example, in the EMCal, the ``plotInGridSelectionPattern`` is ``_SM``, since "SM" denotes a supermodule. Attributes: prettyName (str): Readable name of the group. Set via the ``groupName`` in the constructor. selectionPattern (str): Pattern of the histogram names that will be selected. plotInGridSelectionPattern (str): Pattern (substring) which denotes whether the histograms should be plotted in a grid. plotInGrid (bool): True when the histograms should be plotted in a grid. histList (PersistentList): List of histogram names that should be filled when the ``selectionPattern`` is matched. """ def __init__(self, prettyName, groupSelectionPattern, plotInGridSelectionPattern = "DO NOT PLOT IN GRID"): self.prettyName = prettyName self.selectionPattern = groupSelectionPattern self.plotInGridSelectionPattern = plotInGridSelectionPattern self.histList = persistent.list.PersistentList() # So that it is not necessary to check the list every time if self.plotInGridSelectionPattern in self.selectionPattern: self.plotInGrid = True else: self.plotInGrid = False def __repr__(self): """ Representation of the object. """ # Dummy call. See note at the top of the module. self.prettyName return "{}(prettyName = {prettyName}, groupSelectionPattern = {groupSelectionPattern}," \ " plotInGridSelectionPattern = {plotInGridSelectionPattern}".format(self.__class__.__name__, **self.__dict__) def __str__(self): """ Print the elements of the object. """ # Dummy call. See note at the top of the module. self.prettyName return "{}: prettyName = {prettyName}, groupSelectionPattern = {groupSelectionPattern}," \ " plotInGridSelectionPattern = {plotInGridSelectionPattern}, histList: {histList}," \ " plotInGrid: {plotInGrid}".format(self.__class__.__name__, **self.__dict__)
[docs]class histogramContainer(persistent.Persistent): """ Histogram information container. Organizes information about a particular histogram (or set of histograms). Manages functions that process and otherwise modify the histogram, which are specified through the plugin system. The container also manages plotting details. Note: The histogram container doesn't always have access to the underlying histogram. When constructing the container, it is useful to have the histogram available to provide some information, but then the histogram should not be needed until final processing is performed and the hist is plotted. When this final step is reached, the histogram can be retrieved by ``retrieveHistogram()`` helper function. Args: histName (str): Name of the histogram. Doesn't necessarily need to be the same as ``TH1.GetName()``. histList (list): List of histogram names that should contribute to this container. Used for stacking multiple histograms on onto one canvas. Default: None prettyName (str): Name of the histogram that is appropriate for display. Default: ``None``, which will lead to be it being set to ``histName``. Attributes: histName (str): Name of the histogram. Doesn't necessarily need to be the same as ``TH1.GetName()``. prettyName (str): Name of the histogram that is appropriate for display. histList (list): List of histogram names that should contribute to this container. Used for stacking multiple histograms on onto one canvas. Default: None. See ``retrieveHistogram()`` for more information on how this functionality is utilized. information (PersistentMapping): Information that is extracted from the histogram that should be stored persistently and displayed. This information will be displayed with the web app, with the key shown as a clickable button, and the value information stored behind it. hist (ROOT.TH1): The histogram which this container wraps. histType (ROOT.TClass): Class of the histogram. For example, ``ROOT.TH1F``. Can be used for functions that only apply to 2D hists, etc. It is stored separately from the histogram to allow for it to be available even when the underlying histogram is not (as occurs while setting up but not yet processing a histogram). drawOptions (str): Draw options to be passed to ``TH1.Draw()`` when drawing the histogram. canvas (ROOT.TCanvas): Canvas onto which the histogram will be plotted. Available after the histogram has been classified (ie in processing functions). projectionFunctionsToApply (PersistentList): List-like object of functions that perform projections to the histogram that is represented by this container. See the :doc:`detector subsystem README </detectorPluginsReadme>` for more information. functionsToApply (PersistentList): List-like object of functions that are applied to the histogram during the processing step. See the :doc:`detector subsystem README </detectorPluginsReadme>` for more information. trendingObjects (PersistentList): List-like object of trending objects which operate on this histogram. See the :doc:`detector subsystem and trending README </detectorPluginsReadme>` for more information. """ def __init__(self, histName, histList = None, prettyName = None): # Replace any slashes with underscores to ensure that it can be used safely as a filename #histName = histName.replace("/", "_") self.histName = histName # Only assign if meaningful if prettyName is not None: self.prettyName = prettyName else: self.prettyName = self.histName self.histList = histList self.information = persistent.mapping.PersistentMapping() self.hist = None self.histType = None self.drawOptions = "" # Contains the canvas where the hist may be plotted, along with additional content self.canvas = None # Functions which will be applied to project an available histogram to a new derived histogram self.projectionFunctionsToApply = persistent.list.PersistentList() # Functions which will be applied to the histogram each time it is processed self.functionsToApply = persistent.list.PersistentList() # Trending objects which use this histogram self.trendingObjects = persistent.list.PersistentList() def __repr__(self): """ Representation of the object. """ # Dummy call. See note at the top of the module. self.histName return "{}(histName = {histName}, histList = {histList}, prettyName = {prettyName})".format(self.__class__.__name__, **self.__dict__) def __str__(self): """ Print many of the elements of the object. """ # Dummy call. See note at the top of the module. self.histName return "{}: histName = {histName}, histList = {histList}, prettyName = {prettyName}," \ " information: {information}, hist: {hist}, histType: {histType}, drawOptions: {drawOptions}," \ " canvas: {canvas}, projectionFunctionsToApply: {projectionFunctionsToApply}," \ " functionsToApply: {functionsToApply}".format(self.__class__.__name__, **self.__dict__)
[docs] def retrieveHistogram(self, ROOT, fIn = None, trending = None): """ Retrieve the histogram from the given file or trending container. This function can retrieve a single histogram from a file, multiple hists from a file to create a stack (based on the hist names in ``histList``), or a single trending histogram stored in the collection of trending objects. Args: ROOT (ROOT): ROOT module. Passed into this object so this module doesn't need to directly depend on importing ROOT. fIn (ROOT.TFile): File in which the histogram(s) is stored. Default: ``None``. trending (trendingContainer): Contains the trending objects, including the trending histogram which is represented in this histogram container. It is the source of the histogram, and therefore similar to the input ROOT file. Default: ``None``. Returns: bool: True if the histogram was successfully retrieved. """ returnValue = True if fIn: if self.histList is not None: if len(self.histList) > 1: self.hist = ROOT.THStack(self.histName, self.histName) for name in self.histList: logger.debug("HistName in list: {name}".format(name = name)) self.hist.Add(fIn.GetKey(name).ReadObj()) self.drawOptions += "nostack" # TODO: Allow for further configuration of THStack, like TLegend and such elif len(self.histList) == 1: # Projective histogram histName = next(iter(self.histList)) logger.debug("Retrieving histogram {} for projection!".format(histName)) # Clone the histogram so restricted ranges don't propagate to other uses of this hist tempHist = fIn.GetKey(histName) if tempHist: self.hist = tempHist.ReadObj().Clone("{}_temp".format(histName)) else: returnValue = False else: logger.warning("histList for hist {} is defined, but is empty".format(self.histName)) returnValue = False else: logger.debug("HistName: {histName}".format(histName = self.histName)) tempHist = fIn.GetKey(self.histName) if tempHist: self.hist = tempHist.ReadObj() else: returnValue = False elif trending: # Retrieve the trending histogram from the collection of trending objects. returnValue = False # Not particularly efficient, but it's straightforward. for subsystemName, subsystem in iteritems(trending.trendingObjects): for name, trendingObject in iteritems(subsystem): if self.histName in trendingObject.hist.histName: # Retrieve the graph and make it available in the trending histogram container trendingObject.retrieveHistogram() returnValue = True #self.hist = trending.trendingObjects[subsystemName][self.histName].trendingHist else: logger.warning("Unable to retrieve histogram {}".format(self.histName)) returnValue = False return returnValue