#!/usr/bin/env python
""" Classes that define the processing structure of Overwatch.
Classes that define the structure of processing. This information can be created and processed,
or read from file.
Note:
For the ``__repr__`` and ``__str__`` methods defined here, they can throw ``KeyError`` for class attributes
if the these methods rely on ``__dict__`` and the objects have just been loaded from ZODB. Presumably, ``__dict__``
doesn't cause ZODB to fully load the object. To work around this issue, any methods using ``__dict__`` first
call some attribute (ideally, something simple) to ensure that the object is fully loaded. The result of that call
is ignored.
.. codeauthor:: Raymond Ehlers <raymond.ehlers@cern.ch>, Yale University
"""
from __future__ import print_function
from __future__ import absolute_import
from future.utils import iteritems
from future.utils import itervalues
# Database
import BTrees.OOBTree
import persistent
import os
import pendulum
import logging
# Setup logger
logger = logging.getLogger(__name__)
from ..base import utilities
from ..base import config
#from config.processingParams import processingParameters
(processingParameters, filesRead) = config.readConfig(config.configurationType.processing)
[docs]class runContainer(persistent.Persistent):
""" Object to represent a particular run.
It stores run level information, as well the subsystems which then containing the corresponding
event information (histogram groups, histograms, etc).
Note that files are *not* considered event level information because the files correspond to individual
subsystem. Furthermore, in rare cases, there may be numbers of files for different subsystems
that are included in an individual run. Consequently, it is cleaner for each subsystem to track it's
own files.
To allow the object to be reconstructed from scratch, the HLT mode is stored by writing a YAML file
in the corresponding run directory. This file is referred to as the "run info" file. Additional
properties could also be written to this file to avoid the loss of transient information.
Note:
The run info file is read and written on object construction. It will only be checked if the
HLT mode is not set.
Args:
runDir (str): String containing the run number. For an example run 123456, it should be
formatted as ``Run123456``.
fileMode (bool): If true, the run data was collected in cumulative mode. See the
:doc:`processing README </processingReadme>` for further information.
hltMode (str): String containing the HLT mode used for the run.
Attributes:
runDir (str): String containing the run number. For an example run 123456, it should be
formatted as ``Run123456``
runNumber (int): Run number extracted from the ``runDir``.
prettyName (str): Reformatting of the ``runDir`` for improved readability.
fileMode (bool): If true, the run data was collected in cumulative mode. See the
:doc:`processing README </processingReadme>` for further information. Set via ``fileMode``.
subsystems (BTree): Dict-like object which will contain all of the subsystem containers in
an event. The key is the corresponding subsystem three letter name.
hltMode (str): Mode the HLT operated in for this run. Valid HLT modes are "B", "C", "E", and "U".
Further information on the various modes is in the :doc:`processing README </processingReadme>`.
Default: ``None`` (which will be converted to "U", for "unknown").
"""
def __init__(self, runDir, fileMode, hltMode = None):
self.runDir = runDir
self.runNumber = int(runDir.replace("Run", ""))
self.prettyName = "Run {runNumber}".format(runNumber = self.runNumber)
self.mode = fileMode
self.subsystems = BTrees.OOBTree.BTree()
self.hltMode = hltMode
# Try to retrieve the HLT mode if it was not passed
runDirectory = os.path.join(processingParameters["dirPrefix"], self.runDir)
if not hltMode:
self.hltMode = utilities.retrieveHLTModeFromStoredRunInfo(runDirectory = runDirectory)
# Write run information
utilities.writeRunInfoToFile(runDirectory = runDirectory, hltMode = hltMode)
def __repr__(self):
""" Representation of the object. """
# Dummy call. See note at the top of the module.
self.runDir
return "{}(runDir = {runDir}, fileMode = {mode}, hltMode = {hltMode})".format(self.__class__.__name__, **self.__dict__)
def __str__(self):
""" Print many of the elements of the object. """
return "{}: runDir: {runDir}, runNumber: {runNumber}, prettyName: {prettyName}, fileMode: {mode}," \
" subsystems: {subsystems}, hltMode: {hltMode}".format(self.__class__.__name__,
runDir = self.runDir,
runNumber = self.runNumber,
prettyName = self.prettyName,
mode = self.mode,
subsystems = list(self.subsystems.keys()),
hltMode = self.hltMode)
[docs] def isRunOngoing(self):
""" Checks if a run is ongoing.
The ongoing run check is performed by looking checking for a new file in
any of the subsystems. If they have just received a new file, then the run
is ongoing.
Note:
If ``subsystem.newFile`` is false, this is not a sufficient condition to say that
the run has ended. This is because ``newFile`` will be set to false if the subsystem
didn't have a file in the most recent processing run, even if the run is still
ongoing. This can happen for many reasons, including if the processing is executed
more frequently than the data transfer rate or receiver request rate, for example.
However, if ``newFile`` is true, then it is sufficient to know that the run is ongoing.
Args:
None
Returns:
bool: True if the run is ongoing.
"""
returnValue = False
try:
for subsystem in itervalues(self.subsystems):
if subsystem.newFile is True:
# We know we have a new file, so nothing else needs to be done. Just return it.
returnValue = True
break
# If we haven't found a new file yet, we'll check the time stamps.
if returnValue is False:
logger.debug("Checking timestamps for whether the run in ongoing.")
minutesSinceLastTimestamp = self.minutesSinceLastTimestamp()
logger.debug("{minutesSinceLastTimestamp} minutes since the last timestamp.".format(minutesSinceLastTimestamp = minutesSinceLastTimestamp))
# Compare the unix timestamps with a five minute buffer period.
# This buffer time is arbitrarily selected, but the value is motivated by a balance to ensure
# that a missed file doesn't cause the run to appear over, while also not claiming that the
# run continues much longer than it actually does.
if minutesSinceLastTimestamp < 5:
returnValue = True
except KeyError:
returnValue = False
return returnValue
[docs] def minutesSinceLastTimestamp(self):
""" Determine the time since the last file timestamp in minutes.
Args:
None.
Returns:
float: Minutes since the timestamp of the most recent file. Default: -1.
"""
timeSinceLastTimestamp = -1
try:
mostRecentTimestamp = -1
for subsystem in itervalues(self.subsystems):
newestFile = subsystem.files[subsystem.files.keys()[-1]]
if newestFile.fileTime > mostRecentTimestamp:
mostRecentTimestamp = newestFile.fileTime
# The timestamps of the files are set in Geneva, so we need to construct the timestamp in Geneva
# to compare against. The proper timezone for this is "Europe/Zurich".
geneva = pendulum.from_timestamp(mostRecentTimestamp, tz = "Europe/Zurich")
now = pendulum.now()
# Return in minutes
timeSinceLastTimestamp = now.diff(geneva).in_minutes()
except KeyError:
# If there is a KeyError somewhere, we just ignore it and pass back the default value.
pass
return timeSinceLastTimestamp
[docs] def startOfRunTimeStamp(self):
""" Provides the start of the run time stamp in a format suitable for display.
This timestamp is determined by looking at the timestamp of the last subsystem
(arbitrarily selected) that is available in the run. No time zone conversion is
performed, so it simply displays the time zone where the data was stored (CERN
time in production systems).
Args:
None
Returns:
str: Start of run time stamp formatted in an appropriate manner for display.
"""
returnValue = False
try:
# We just take the last subsystem in a given run. Any will do
lastSubsystem = self.subsystems[self.subsystems.keys()[-1]]
returnValue = lastSubsystem.prettyPrintUnixTime(lastSubsystem.startOfRun)
except KeyError:
returnValue = False
return returnValue
[docs]class subsystemContainer(persistent.Persistent):
""" Object to represent a particular subsystem (detector).
It stores subsystem level information, including the histograms, groups, and file information.
It is the main container for much of the information that is relevant for processing.
Information on the file storage layout implemented through this class is available in the
:doc:`processing README </processingReadme>`.
Note:
This object checks for and creates a number of directories on initialization.
Args:
subsystem (str): The current subsystem in the form of a three letter, all capital name (ex. ``EMC``).
runDir (str): String containing the run number. For an example run 123456, it should be
formatted as ``Run123456``
startOfRun (int): Start of the run in unix time.
endOfRun (int): End of the run in unix time.
showRootFiles (bool): True if the ROOT files should be made accessible through the run list.
Default: ``False``.
fileLocationSubsystem (str): Subsystem name of where the files are actually located. If a subsystem
has specific data files then this is just equal to the `subsystem`. However, if it relies on
files inside of another subsystem (such as those from the HLT subsystem receiver), then this
variable is equal to that subsystem name. Default: ``None``, which corresponds to the subsystem
storing it's own data.
Attributes:
subsystem (str): The current subsystem in the form of a three letter, all capital name (ex. ``EMC``).
showRootFiles (bool): True if the ROOT files should be made accessible through the run list.
fileLocationSubsystem (str): Subsystem name of where the files are actually located. If a subsystem has
specific data files then this is just equal to the `subsystem`. However, if it relies on files inside
of another subsystem, then this variable is equal to that subsystem name.
files (BTree): Dict-like object which describes subsystem ROOT files. Unix time of a given file is the key
and a file container for that file is the value.
timeSlices (BTree): Dict-like object which describes subsystem time slices. A UUID is the dict key (so they
can be uniquely identified), while a timeSliceContainer with the corresponding time slice properties
is the value.
combinedFile (fileContainer): File container corresponding to the combined file.
baseDir (str): Path to the base storage directory for the subsystem. Of the form ``Run123456/SYS``.
imgDir (str): Path to the image storage directory for the subsystem. Of the form ``Run123456/SYS/img``.
jsonDir (str): Path to the json storage directory for the subsystem. Of the form ``Run123456/SYS/json``.
startOfRun (int): Start of the run in unix time.
endOfRun (int): End of the run in unix time.
runLength (int): Length of the run in minutes.
histGroups (PersistentList): List-like object of histogram groups, which are used to classify similar histograms.
histsInFile (BTree): Dict-like object of all histograms that are in a particular file. Keys are the histogram name,
while the values are ``histogramContainer`` objects which contain the histogram. Hists should be usually be accessed
through the hist groups, but list this provides direct access when necessary early in processing.
histsAvailable (BTree): Dict-like object containing all histograms that are available, including those in a particular
file and those that are created during processing. Newly created hists should be stored in this dict. Keys are
histogram names, while values are ``histogramContainer`` objects which contain the histogram.
hists (BTree): Dict-like object which contains all histograms that should be processed by a histogram.
After initial creation, this should be the definitive source of histograms for processing and display.
Keys are histogram names, while values are ``histogramContainer`` objects which contain the histogram.
newFile (bool): True if we received a new file, while will trigger reprocessing. This flag should only be
changed when beginning processing the next time. To be explicit, if a subsystem just received a new file
and it was processed, this flag should only be changed to ``False`` after the next processing iteration
begins. This allows the status of the run (determined through the subsystem) to be displayed in the web app.
Default: True because if the subsystem is being created, we likely need reprocessing.
nEvents (int): Number of events in the subsystem. Processing will look for a histogram that contains ``events``
in the name and attempt to extract the number of events based on the number of entries. Should not be used
unless the subsystem explicitly includes a histogram with the number of events. Default: 1.
processingOptions (PersistentMapping): Implemented by the subsystem to note options used during
standard processing. The subsystem processing options can vary when processing a time slice,
so storing the options allow us to return to the standard options when performing a full processing.
Keys are the option names as string, while values are their corresponding values.
"""
def __init__(self, subsystem, runDir, startOfRun, endOfRun, showRootFiles = False, fileLocationSubsystem = None):
self.subsystem = subsystem
self.showRootFiles = showRootFiles
# If data does not exist for this subsystem then it is dependent on HLT data
# Detect it automatically if not passed to the initialization.
if fileLocationSubsystem is None:
# Use the subsystem directory as proxy for whether it exists.
# NOTE: This detection works, but it isn't so flexible.
if os.path.exists(os.path.join(processingParameters["dirPrefix"], runDir, subsystem)):
self.fileLocationSubsystem = self.subsystem
else:
self.fileLocationSubsystem = "HLT"
else:
self.fileLocationSubsystem = fileLocationSubsystem
if self.showRootFiles is True and self.subsystem != self.fileLocationSubsystem:
logger.info("It is requested to show ROOT files for subsystem {subsystem}, but the subsystem does not have specific data files. Using HLT data files!".format(subsystem = subsystem))
# Files
# Be certain to set these after the subsystem has been created!
# Contains all files for that particular run
self.files = BTrees.OOBTree.BTree()
self.timeSlices = persistent.mapping.PersistentMapping()
# Only one combined file, so we do not need a dict!
self.combinedFile = None
# Directories
self.setupDirectories(runDir)
# Times
self.startOfRun = startOfRun
self.endOfRun = endOfRun
# The run length is in minutes
self.runLength = self.calculateRunLength()
# Histograms
self.histGroups = persistent.list.PersistentList()
# Should be accessed through the group usually, but this provides direct access
self.histsInFile = BTrees.OOBTree.BTree()
# All hists, including those which were created, along with those in the file
self.histsAvailable = BTrees.OOBTree.BTree()
# Hists list that should be used
self.hists = BTrees.OOBTree.BTree()
# True if we received a new file, therefore leading to reprocessing
# If the subsystem is being created, we likely need reprocessing, so defaults to true
self.newFile = True
# Number of events in the subsystem. The processing will attempt to determine the number of events,
# but it is a subsystem dependent quantity. It needs explicit support.
self.nEvents = 1
# Processing options
self.processingOptions = persistent.mapping.PersistentMapping()
[docs] def calculateRunLength(self, startOfRun = None, endOfRun = None):
""" Helper function to update the run length.
Note:
The run length is defined in minutes.
Args:
startOfRun (int): Start of the run in unix time. Default: ``None``. If not specified,
the ``startOfRun`` stored in the subsystem will be used.
endOfRun (int): End of the run in unix time. Default: ``None``. If not specified,
the ``startOfRun`` stored in the subsystem will be used.
Returns:
int: The calculated run length in minutes.
"""
if startOfRun is None:
startOfRun = self.startOfRun
if endOfRun is None:
endOfRun = self.endOfRun
# The run length is in minutes
runLength = (endOfRun - startOfRun) // 60
return runLength
[docs] def setupDirectories(self, runDir):
""" Helper function to setup the subsystem directories.
Defines the base, img, and JSON directories, as well as creating the them if necessary.
Args:
runDir (str): String containing the run number. For an example run 123456, it should be
formatted as ``Run123456``
Returns:
None. However, it sets the ``baseDir``, ``imgDir``, and ``jsonDir`` properties of the ``subsystemContainer``.
"""
# Depends on whether the subsystem actually contains the files!
self.baseDir = os.path.join(runDir, self.fileLocationSubsystem)
self.imgDir = os.path.join(self.baseDir, "img")
self.jsonDir = os.path.join(self.baseDir, "json")
# Ensure that they exist
if not os.path.exists(os.path.join(processingParameters["dirPrefix"], self.imgDir)):
os.makedirs(os.path.join(processingParameters["dirPrefix"], self.imgDir))
if not os.path.exists(os.path.join(processingParameters["dirPrefix"], self.jsonDir)):
os.makedirs(os.path.join(processingParameters["dirPrefix"], self.jsonDir))
def __repr__(self):
""" Representation of the object. """
return "{}(subsystem = {subsystem}, runDir = {runDir}, startOfRun = {startOfRun}," \
" endOfRun = {endOfRun}, showRootFiles = {showRootFiles}," \
" fileLocationSubsystem = {fileLocationSubsystem})".format(self.__class__.__name__,
subsystem = self.subsystem,
runDir = os.path.dirname(self.baseDir),
startOfRun = self.startOfRun,
endOfRun = self.endOfRun,
showRootFiles = self.showRootFiles,
fileLocationSubsystem = self.fileLocationSubsystem)
def __str__(self):
""" Print many of the elements of the object. """
return "{}: subsystem: {subsystem}, fileLocationSubsystem: {fileLocationSubsystem}," \
" showRootFiles: {showRootFiles}, startOfRun: {startOfRun}, endOfRun: {endOfRun}," \
" newFile: {newFile}, hists: {hists}".format(self.__class__.__name__,
subsystem = self.subsystem,
fileLocationSubsystem = self.fileLocationSubsystem,
showRootFiles = self.showRootFiles,
startOfRun = self.startOfRun,
endOfRun = self.endOfRun,
newFile = self.newFile,
hists = list(self.hists.keys()))
[docs] @staticmethod
def prettyPrintUnixTime(unixTime):
""" Converts the given time stamp into an appropriate manner ("pretty") for display.
The time is returned in the format: "Tuesday, 6 Nov 2018 20:55:10". This function is
mainly needed in Jinja templates were arbitrary functions are not allowed.
Note:
We display this in the CERN time zone, so we convert it here to that timezone.
Args:
unixTime (int): Unix time to be converted.
Returns:
str: The time stamp converted into an appropriate manner for display.
"""
d = pendulum.from_timestamp(unixTime, tz = "Europe/Zurich")
return d.format("dddd, D MMM YYYY HH:mm:ss")
[docs] def resetContainer(self):
""" Clear the stored hist information so we can recreate (reprocess) the subsystem.
Without resetting the container, reprocessing doesn't fully test the processing functions,
which are skipped if these list- and dict-like hist objects have entries.
Args:
None
Returns:
None
"""
del self.histGroups[:]
self.histsInFile.clear()
self.histsAvailable.clear()
self.hists.clear()
[docs]class timeSliceContainer(persistent.Persistent):
""" Time slice information container.
Contains information about a time slice request, including the time ranges and the files involved.
These values are required to uniquely describe a time slice.
Args:
minUnixTimeRequested (int): Minimum requested unix time. This is the first time stamp to be included
in the time slice.
maxUnixTimeRequested (int): Maximum requested unix time. This is the last time stamp to be included
in the time slice.
minUnixTimeAvailable (int): Minimum unix time of the run.
maxUnixTimeAvailable (int): Maximum unix time of the run.
startOfRun (int): Unix time of the start of the run.
filesToMerge (list): List of fileContainer objects which need to be merged to create the time slice.
optionsHash (str): SHA1 hash of the processing options used to construct the time slice.
Attributes:
minUnixTimeRequested (int): Minimum requested unix time. This is the first time stamp to be included
in the time slice.
maxUnixTimeRequested (int): Maximum requested unix time. This is the last time stamp to be included
in the time slice.
minUnixTimeAvailable (int): Minimum unix time of the run.
maxUnixTimeAvailable (int): Maximum unix time of the run.
startOfRun (int): Unix time of the start of the run.
filesToMerge (list): List of fileContainer objects which need to be merged to create the time slice.
optionsHash (str): SHA1 hash of the processing options used to construct the time slice. This hash
is used for caching by comparing the processing options for a new time slice request with those
already processed. If the hashes are the same, we can directly return the already processed result.
filenamePrefix (str): Filename for the timeSlice file, based on the given start and end times.
filename (fileContainer): File container for the timeSlice file.
processingOptions (PersistentMapping): Implemented by the time slice container to note options used
during standard processing. The time slice processing options can vary when compared to standard
subsystem processing, so storing the options allow us to apply the custom time slice options.
"""
def __init__(self, minUnixTimeRequested, maxUnixTimeRequested, minUnixTimeAvailable, maxUnixTimeAvailable, startOfRun, filesToMerge, optionsHash):
# Requested times
self.minUnixTimeRequested = minUnixTimeRequested
self.maxUnixTimeRequested = maxUnixTimeRequested
# Available times
self.minUnixTimeAvailable = minUnixTimeAvailable
self.maxUnixTimeAvailable = maxUnixTimeAvailable
# Start of run is also in unix time
self.startOfRun = startOfRun
self.optionsHash = optionsHash
# File containers of the files to merge
self.filesToMerge = filesToMerge
# Filename prefix for saving out files
self.filenamePrefix = "timeSlice.{}.{}.{}".format(self.minUnixTimeAvailable, self.maxUnixTimeAvailable, self.optionsHash)
# Create filename
self.filename = fileContainer(self.filenamePrefix + ".root")
# Processing options
# Implemented by the detector to note how it was processed that may be changed during time slice processing
# This allows us return full processing when appropriate
# Same as the type of options implemented in the subsystemContainer!
self.processingOptions = persistent.mapping.PersistentMapping()
def __repr__(self):
""" Representation of the object. """
# Dummy call. See note at the top of the module.
self.minUnixTimeRequested
return "{}(minUnixTimeRequested = {minUnixTimeRequested}, maxUnixTimeRequested = {maxUnixTimeRequested}," \
" minUnixTimeAvailable = {minUnixTimeAvailable}, maxUnixTimeAvailable = {maxUnixTimeAvailable}," \
" startOfRun = {startOfRun}, filesToMerge = {filesToMerge}," \
" optionsHash = {optionsHash}".format(self.__class__.__name__, **self.__dict__)
def __str__(self):
""" Print many of the elements of the object. """
return "{}: minUnixTimeRequested = {minUnixTimeRequested}, maxUnixTimeRequested = {maxUnixTimeRequested}," \
" minUnixTimeAvailable = {minUnixTimeAvailable}, maxUnixTimeAvailable = {maxUnixTimeAvailable}," \
" filenamePrefix: {filenamePrefix}, startOfRun = {startOfRun}, filesToMerge = {filesToMerge}," \
" optionsHash = {optionsHash}".format(self.__class__.__name__,
minUnixTimeRequested = self.minUnixTimeRequested,
maxUnixTimeRequested = self.maxUnixTimeRequested,
minUnixTimeAvailable = self.minUnixTimeAvailable,
maxUnixTimeAvailable = self.maxUnixTimeAvailable,
filenamePrefix = self.filenamePrefix,
startOfRun = self.startOfRun,
filesToMerge = self.filesToMerge,
optionsHash = self.optionsHash)
[docs] def timeInMinutes(self, inputTime):
""" Return the time from the input unix time to the start of the run in minutes.
Args:
inputTime (int): Unix time to be compared to the start of run time.
Returns:
int: Minutes from the start of run to the given time.
"""
#logger.debug("inputTime: {inputTime}, startOfRun: {startOfRun}".format(inputTime = inputTime, startOfRun = self.startOfRun))
return (inputTime - self.startOfRun) // 60
[docs] def timeInMinutesRounded(self, inputTime):
""" Return the time from the input unix time to start of the run in minutes, rounded to
the nearest minute.
Note:
I believe this was created due to some float vs int issues in the Jinja templating
system. Although the purpose of this function isn't entirely clear, it is kept for
compatibility purposes.
Args:
inputTime (int): Unix time to be compared to the start of run time.
Returns:
int: Minutes from the start of run to the given time.
"""
return round(self.timeInMinutes(inputTime))
[docs]class fileContainer(persistent.Persistent):
""" File information container.
This object wraps a ROOT filename, providing convenient access to relevant properties, such
as the type of file (combined, timeSlice, standard), and the time stamp. This information
is often stored in the filename itself, but extraction procedures vary for each file type.
Note that it *does not* open the file itself - this is still the responsibility of the user.
Args:
filenae (str): Filename of the corresponding file. This is expected to the full path
from the ``dirPrefix`` to the file.
startOfRun (int): Start of the run in unix time. Default: ``None``. The default will lead
to timeIntoRun being set to ``-1``. The default is most commonly used for time slices,
where the start of run isn't so meaningful.
Attributes:
filenae (str): Filename of the corresponding file. This is expected to the full path
from the ``dirPrefix`` to the file.
combinedFile (bool): True if this file corresponds to a combined file. It is set to ``True``
if "combined" is in the filename.
timeSlice (bool): True if this file corresponds to a time slice. It is set to ``True`` if
"timeSlice" in in the filename.
fileTime (int): Unix time stamp of the file, extracted from the filename.
timeIntoRun (int): Time in seconds from the start of the run to the file time. Depends on
startOfRun being a valid time when the object was created.
"""
def __init__(self, filename, startOfRun = None):
self.filename = filename
# Determine types of file
self.combinedFile = False
self.timeSlice = False
if "combined" in self.filename:
self.combinedFile = True
elif "timeSlice" in self.filename:
self.timeSlice = True
# The combined file time will be the length of the run
# The time slice will be the length of the time slice
self.fileTime = utilities.extractTimeStampFromFilename(self.filename)
if startOfRun:
self.timeIntoRun = self.fileTime - startOfRun
else:
# Show a clearly invalid time, since timeIntoRun doesn't make much sense for a time slice
self.timeIntoRun = -1
def __repr__(self):
""" Representation of the object. """
return "{}(filename = {filename}, startOfRun = {startOfRun})".format(self.__class__.__name__,
filename = self.filename,
startOfRun = self.fileTime - self.timeIntoRun)
def __str__(self):
""" Print the elements of the object. """
# Dummy call. See note at the top of the module.
self.filename
return "{}: filename = {filename}, combinedFile: {combinedFile}, timeSlice: {timeSlice}," \
" fileTime: {fileTime}, timeIntoRun: {timeIntoRun}".format(self.__class__.__name__, **self.__dict__)
[docs]class histogramGroupContainer(persistent.Persistent):
""" Organizes similar histograms into groups for processing and display.
Histograms groups are created by providing name substrings of histogram which should be included.
The name substring is referred to as a ``groupSelectionPattern``. For example, if the pattern was
"hello", all histograms containing "hello" would be selected. Additional properties related to
groups, such as display information, are also stroed.
Args:
prettyName (str): Readable name of the group.
groupSelectionPattern (str): Pattern of the histogram names that will be selected. For example, if
wanted to select histograms related to EMCal patch amplitude, we would make the pattern something
like "PatchAmp". The pattern depends on the name of the histograms sent from the HLT.
plotInGridSelectionPattern (str): Pattern which denotes whether the histograms should be plotted in
a grid. ``plotInGrid`` is set based on whether this value is in ``groupSelectionPattern``. For
example, in the EMCal, the ``plotInGridSelectionPattern`` is ``_SM``, since "SM" denotes a
supermodule.
Attributes:
prettyName (str): Readable name of the group. Set via the ``groupName`` in the constructor.
selectionPattern (str): Pattern of the histogram names that will be selected.
plotInGridSelectionPattern (str): Pattern (substring) which denotes whether the histograms should be
plotted in a grid.
plotInGrid (bool): True when the histograms should be plotted in a grid.
histList (PersistentList): List of histogram names that should be filled when the ``selectionPattern`` is matched.
"""
def __init__(self, prettyName, groupSelectionPattern, plotInGridSelectionPattern = "DO NOT PLOT IN GRID"):
self.prettyName = prettyName
self.selectionPattern = groupSelectionPattern
self.plotInGridSelectionPattern = plotInGridSelectionPattern
self.histList = persistent.list.PersistentList()
# So that it is not necessary to check the list every time
if self.plotInGridSelectionPattern in self.selectionPattern:
self.plotInGrid = True
else:
self.plotInGrid = False
def __repr__(self):
""" Representation of the object. """
# Dummy call. See note at the top of the module.
self.prettyName
return "{}(prettyName = {prettyName}, groupSelectionPattern = {groupSelectionPattern}," \
" plotInGridSelectionPattern = {plotInGridSelectionPattern}".format(self.__class__.__name__, **self.__dict__)
def __str__(self):
""" Print the elements of the object. """
# Dummy call. See note at the top of the module.
self.prettyName
return "{}: prettyName = {prettyName}, groupSelectionPattern = {groupSelectionPattern}," \
" plotInGridSelectionPattern = {plotInGridSelectionPattern}, histList: {histList}," \
" plotInGrid: {plotInGrid}".format(self.__class__.__name__, **self.__dict__)
[docs]class histogramContainer(persistent.Persistent):
""" Histogram information container.
Organizes information about a particular histogram (or set of histograms). Manages functions that
process and otherwise modify the histogram, which are specified through the plugin system. The
container also manages plotting details.
Note:
The histogram container doesn't always have access to the underlying histogram. When constructing
the container, it is useful to have the histogram available to provide some information, but then
the histogram should not be needed until final processing is performed and the hist is plotted.
When this final step is reached, the histogram can be retrieved by ``retrieveHistogram()`` helper
function.
Args:
histName (str): Name of the histogram. Doesn't necessarily need to be the same as ``TH1.GetName()``.
histList (list): List of histogram names that should contribute to this container. Used for stacking
multiple histograms on onto one canvas. Default: None
prettyName (str): Name of the histogram that is appropriate for display. Default: ``None``, which
will lead to be it being set to ``histName``.
Attributes:
histName (str): Name of the histogram. Doesn't necessarily need to be the same as ``TH1.GetName()``.
prettyName (str): Name of the histogram that is appropriate for display.
histList (list): List of histogram names that should contribute to this container. Used for stacking
multiple histograms on onto one canvas. Default: None. See ``retrieveHistogram()`` for more
information on how this functionality is utilized.
information (PersistentMapping): Information that is extracted from the histogram that should be
stored persistently and displayed. This information will be displayed with the web app, with
the key shown as a clickable button, and the value information stored behind it.
hist (ROOT.TH1): The histogram which this container wraps.
histType (ROOT.TClass): Class of the histogram. For example, ``ROOT.TH1F``. Can be used for functions
that only apply to 2D hists, etc. It is stored separately from the histogram to allow for it to
be available even when the underlying histogram is not (as occurs while setting up but not yet
processing a histogram).
drawOptions (str): Draw options to be passed to ``TH1.Draw()`` when drawing the histogram.
canvas (ROOT.TCanvas): Canvas onto which the histogram will be plotted. Available after the histogram
has been classified (ie in processing functions).
projectionFunctionsToApply (PersistentList): List-like object of functions that perform projections
to the histogram that is represented by this container. See the :doc:`detector subsystem README </detectorPluginsReadme>`
for more information.
functionsToApply (PersistentList): List-like object of functions that are applied to the histogram
during the processing step. See the :doc:`detector subsystem README </detectorPluginsReadme>`
for more information.
trendingObjects (PersistentList): List-like object of trending objects which operate on this
histogram. See the :doc:`detector subsystem and trending README </detectorPluginsReadme>`
for more information.
"""
def __init__(self, histName, histList = None, prettyName = None):
# Replace any slashes with underscores to ensure that it can be used safely as a filename
#histName = histName.replace("/", "_")
self.histName = histName
# Only assign if meaningful
if prettyName is not None:
self.prettyName = prettyName
else:
self.prettyName = self.histName
self.histList = histList
self.information = persistent.mapping.PersistentMapping()
self.hist = None
self.histType = None
self.drawOptions = ""
# Contains the canvas where the hist may be plotted, along with additional content
self.canvas = None
# Functions which will be applied to project an available histogram to a new derived histogram
self.projectionFunctionsToApply = persistent.list.PersistentList()
# Functions which will be applied to the histogram each time it is processed
self.functionsToApply = persistent.list.PersistentList()
# Trending objects which use this histogram
self.trendingObjects = persistent.list.PersistentList()
def __repr__(self):
""" Representation of the object. """
# Dummy call. See note at the top of the module.
self.histName
return "{}(histName = {histName}, histList = {histList}, prettyName = {prettyName})".format(self.__class__.__name__, **self.__dict__)
def __str__(self):
""" Print many of the elements of the object. """
# Dummy call. See note at the top of the module.
self.histName
return "{}: histName = {histName}, histList = {histList}, prettyName = {prettyName}," \
" information: {information}, hist: {hist}, histType: {histType}, drawOptions: {drawOptions}," \
" canvas: {canvas}, projectionFunctionsToApply: {projectionFunctionsToApply}," \
" functionsToApply: {functionsToApply}".format(self.__class__.__name__, **self.__dict__)
[docs] def retrieveHistogram(self, ROOT, fIn = None, trending = None):
""" Retrieve the histogram from the given file or trending container.
This function can retrieve a single histogram from a file, multiple hists from a file
to create a stack (based on the hist names in ``histList``), or a single trending
histogram stored in the collection of trending objects.
Args:
ROOT (ROOT): ROOT module. Passed into this object so this module doesn't need
to directly depend on importing ROOT.
fIn (ROOT.TFile): File in which the histogram(s) is stored. Default: ``None``.
trending (trendingContainer): Contains the trending objects, including the trending
histogram which is represented in this histogram container. It is the source
of the histogram, and therefore similar to the input ROOT file. Default: ``None``.
Returns:
bool: True if the histogram was successfully retrieved.
"""
returnValue = True
if fIn:
if self.histList is not None:
if len(self.histList) > 1:
self.hist = ROOT.THStack(self.histName, self.histName)
for name in self.histList:
logger.debug("HistName in list: {name}".format(name = name))
self.hist.Add(fIn.GetKey(name).ReadObj())
self.drawOptions += "nostack"
# TODO: Allow for further configuration of THStack, like TLegend and such
elif len(self.histList) == 1:
# Projective histogram
histName = next(iter(self.histList))
logger.debug("Retrieving histogram {} for projection!".format(histName))
# Clone the histogram so restricted ranges don't propagate to other uses of this hist
tempHist = fIn.GetKey(histName)
if tempHist:
self.hist = tempHist.ReadObj().Clone("{}_temp".format(histName))
else:
returnValue = False
else:
logger.warning("histList for hist {} is defined, but is empty".format(self.histName))
returnValue = False
else:
logger.debug("HistName: {histName}".format(histName = self.histName))
tempHist = fIn.GetKey(self.histName)
if tempHist:
self.hist = tempHist.ReadObj()
else:
returnValue = False
elif trending:
# Retrieve the trending histogram from the collection of trending objects.
returnValue = False
# Not particularly efficient, but it's straightforward.
for subsystemName, subsystem in iteritems(trending.trendingObjects):
for name, trendingObject in iteritems(subsystem):
if self.histName in trendingObject.hist.histName:
# Retrieve the graph and make it available in the trending histogram container
trendingObject.retrieveHistogram()
returnValue = True
#self.hist = trending.trendingObjects[subsystemName][self.histName].trendingHist
else:
logger.warning("Unable to retrieve histogram {}".format(self.histName))
returnValue = False
return returnValue