Source code for overwatch.webApp.validation

#!/usr/bin/env python

""" Contains validation functions.

These functions are important to ensure that only valid values are passed to the processing functions.
Validation could likely be improved by moving WTForms, which Overwatch already depends upon for CSRF
protection.

.. codeauthor:: Raymond Ehlers <raymond.ehlers@cern.ch>, Yale University
"""

# General
import json
from flask import request
# Used to parse GET parameters
try:
    import urllib.parse as urlparse
except ImportError:
    import urlparse

# Config
from ..base import config
(serverParameters, filesRead) = config.readConfig(config.configurationType.webApp)

# Logging
import logging
# Setup logger
logger = logging.getLogger(__name__)

[docs]def validateLoginPostRequest(request):
    """ Validates the login POST request.

    Note:
        The error format is different here. Instead of a list in a dict, we simply have a string.

    Args:
        request (Flask.request): The request object from Flask.
    Return
        tuple: (errorValue, username, password), where errorValue (str) contains the error that
            may have occurred, username (str) is the username extracted from POST request, and
            password (str) is the password extracted from POST request.
    """
    errorValue = None
    try:
        # We enforce the type as as string here
        username = request.form.get("username", type=str)
        password = request.form.get("password", type=str)
    except KeyError as e:
        errorValue = "Key error in " + e.args[0] + ". Please enter a username and password in the form."

    return (errorValue, username, password)

[docs]def validateTimeSlicePostRequest(request, runs):
    """ Validates the time slice POST request.

    The return tuple contains the validated values. The error value should always be checked first
    before using the other return values (they will be safe, but may not be meaningful).

    Warning:
        If an error occurs in determining the run or subsystem, we cannot retrieve the rest of the
        information necessary to validate the request, so the rest of the values in the return tuple are
        set to ``None``.

    Note:
        For the error format in ``errorValue``, see the :doc:`web app README </webAppReadme>`.

    Note:
        The listed args (after the first two) are provided through the flask ``request.form`` dictionary.

    Args:
        request (Flask.request): The request object from Flask.
        runs (BTree): Dict-like object which stores all run, subsystem, and hist information. Keys are the
            in the ``runDir`` format ("Run123456"), while the values are ``runContainer`` objects.
        minTime (float): Minimum time for the time slice.
        maxTime (float): Maximum time for the time slice.
        runDir (str): String containing the run number. For an example run 123456, it should be
            formatted as ``Run123456``.
        subsystemName (str): The current subsystem in the form of a three letter, all capital name (ex. ``EMC``).
        scaleHists (str): True if the hists should be scaled by the number of events. Converted from string to bool.
        hotChannelThreshold (int): Value of the hot channel threshold.
        histGroup (str): Name of the requested hist group. It is fine for it to be an empty string.
        histName (str): Name of the requested histogram. It is fine for it to be an empty string.
    Returns:
        tuple: (errorValue, minTime, maxTime, runDir, subsystemName, scrollAmount) where errorValue (dict)
            containers any possible errors, minTime (float) is the minimum time for the time slice,
            maxTime (float) is the maximum time for the time slice, runDir (str) is the run dir formatted
            string for which the time slice should be performed, subsystemName (str) is the current subsystem
            in the form of a three letter, all capital name (ex. ``EMC``), and scrollAmount (float) is the
            amount to scroll down the page to return to precisely where the user was previously.
    """
    error = {}
    try:
        # Enforce the particular types via ``get(...)``.
        minTime = request.form.get("minTime", -1, type=float)
        maxTime = request.form.get("maxTime", None, type=float)
        runDir = request.form.get("runDir", None, type=str)
        subsystemName = request.form.get("subsystem", None, type=str)
        scaleHists = request.form.get("scaleHists", False, type=str)
        hotChannelThreshold = request.form.get("hotChannelThreshold", -1, type=int)
        histGroup = convertRequestToStringWhichMayBeEmpty("histGroup", request.form)
        histName = convertRequestToStringWhichMayBeEmpty("histName", request.form)
        # Will be set below, but we define it here so that we have valid return values.
        inputProcessingOptions = {}
    # See: https://stackoverflow.com/a/23139085
    except KeyError as e:
        # Format is:
        # errors = {'hello2': ['world', 'world2'], 'hello': ['world', 'world2']}
        # See: https://stackoverflow.com/a/2052206
        error.setdefault("keyError", []).append("Key error in " + e.args[0])

    # Validate values based on available runs.
    try:
        # Retrieve run
        if runDir in runs.keys():
            run = runs[runDir]
        else:
            error.setdefault("Run Dir", []).append("Run dir {runDir} is not available in runs!".format(runDir = runDir))
            # Invalidate and we cannot continue
            return (error, None, None, None, None, None, None, None, None)

        # Retrieve subsystem
        if subsystemName in run.subsystems.keys():
            subsystem = run.subsystems[subsystemName]
        else:
            error.setdefault("subsystem", []).append("Subsystem name {subsystemName} is not available in {prettyName}!".format(subsystemName = subsystemName, prettyName = run.prettyName))
            # Invalidate and we cannot continue
            return (error, None, None, None, None, None, None, None, None)

        # Check times
        if minTime < 0:
            error.setdefault("minTime", []).append("{minTime} less than 0!".format(minTime = minTime))
        if maxTime > subsystem.runLength:
            error.setdefault("maxTime", []).append("Max time of {maxTime} greater than the run length of {runLength}".format(maxTime = maxTime, runLength = subsystem.runLength))
        if minTime > maxTime:
            error.setdefault("minTime", []).append("minTime {minTime} is greater than maxTime {maxTime}".format(minTime = minTime, maxtime = maxTime))

        # Validate histGroup and histName
        # NOTE: It could be valid for both to be None!
        validateHistGroupAndHistName(histGroup, histName, subsystem, run, error)

        # Processing options
        # Ensure scaleHists is a bool
        if scaleHists is not False:
            scaleHists = True
        inputProcessingOptions["scaleHists"] = scaleHists

        # Check hot channel threshold
        # NOTE: The max hot channel threshold (hotChannelThreshold) is also defined here!
        if hotChannelThreshold < 0 or hotChannelThreshold > 1000:
            # NOTE: We also mention the hot channel limits here
            error.setdefault("hotChannelThreshold", []).append("Hot channel threshold {hotChannelThreshold} is outside the possible range of 0-1000!".format(hotChannelThreshold = hotChannelThreshold))
        inputProcessingOptions["hotChannelThreshold"] = hotChannelThreshold

    # Handle an unexpected exception
    except Exception as e:
        error.setdefault("generalError", []).append("Unknown exception! " + str(e))

    return (error, minTime, maxTime, runDir, subsystemName, histGroup, histName, inputProcessingOptions)

[docs]def validateRunPage(runDir, subsystemName, requestedFileType, runs):
    """ Validates requests to the various run page types (handling individual run pages and root files).

    The return tuple contains the validated values. The error value should always be checked first
    before using the other return values (they will be safe, but may not be meaningful).

    Note:
        For the error format in ``error``, see the :doc:`web app README </webAppReadme>`.

    Note:
        The listed args (after the first four) are provided through the flask ``request.args`` dictionary.

    Args:
        runDir (str): String containing the run number. For an example run 123456, it should be
            formatted as ``Run123456``
        subsystemName (str): The current subsystem in the form of a three letter, all capital name (ex. ``EMC``).
        requestedFileType (str): Either "runPage", which corresponds to a standard run page or "rootFiles", which
            corresponds to the page displaying the available root files.
        runs (BTree): Dict-like object which stores all run, subsystem, and hist information. Keys are the
            in the ``runDir`` format ("Run123456"), while the values are ``runContainer`` objects. This should
            be retrieved from the database.
        jsRoot (bool): True if the response should use jsRoot instead of images.
        ajaxRequest (bool): True if the response should be via AJAX.
        requestedHistGroup (str): Name of the requested hist group. It is fine for it to be an empty string.
        requestedHist (str): Name of the requested histogram. It is fine for it to be an empty string.
    Returns:
        tuple: (error, run, subsystem, requestedFileType, jsRoot, ajaxRequest, requestedHistGroup, requestedHist, timeSliceKey, timeSlice)
            where error (dict) contains any possible errors, run (runContainer) corresponds to the current
            run, subsystem (subsystemContainer) corresponds to the current subsystem, requestedFileType (str)
            is the type of run page ("runPage" or "rootFiles"), jsRoot (bool) is True if the response should
            use jsRoot, ajaxRequest (bool) is true if the response should be as AJAX, requestedHistGroup (str)
            is the name of the requested hist group, requestedHist (str) is the name of the requested histogram,
            timeSliceKey (str) is the time slice key, and timeSlice (timeSliceContainer) is the time slice object.
            For more on the last two arguments, see ``retrieveAndValidateTimeSlice(...)``.
    """
    error = {}
    try:
        # Set and validate run
        if runDir in runs.keys():
            run = runs[runDir]
        else:
            error.setdefault("Run Dir", []).append("{runDir} is not a valid run dir! Please select a different run!".format(runDir = runDir))
            # Invalidate and we cannot continue
            return (error, None, None, None, None, None, None, None, None, None)

        # Set subsystem and validate
        if subsystemName in run.subsystems.keys():
            subsystem = runs[runDir].subsystems[subsystemName]
        else:
            error.setdefault("Subsystem", []).append("{subsystemName} is not a valid subsystem in {prettyName}!".format(subsystemName = subsystemName, prettyName = run.prettyName))
            # Invalidate and we cannot continue
            return (error, None, None, None, None, None, None, None, None, None)

        # Validate requested file type
        if requestedFileType not in ["runPage", "rootFiles"]:
            error.setdefault("Request Error", []).append("Requested: {requestedFileType}. Must request either runPage or rootFiles!".format(requestedFileType = requestedFileType))

        # Determine request parameters
        jsRoot = convertRequestToPythonBool("jsRoot", request.args)
        ajaxRequest = convertRequestToPythonBool("ajaxRequest", request.args)
        requestedHistGroup = convertRequestToStringWhichMayBeEmpty("histGroup", request.args)
        requestedHist = convertRequestToStringWhichMayBeEmpty("histName", request.args)

        # Retrieve time slice key and time slice object
        (timeSliceKey, timeSlice) = retrieveAndValidateTimeSlice(subsystem, error)
    except KeyError as e:
        # Format is:
        # errors = {'hello2': ['world', 'world2'], 'hello': ['world', 'world2']}
        # See: https://stackoverflow.com/a/2052206
        error.setdefault("keyError", []).append("Key error in " + e.args[0])
    except Exception as e:
        error.setdefault("generalError", []).append("Unknown exception! " + str(e))

    if error == {}:
        return (error, run, subsystem, requestedFileType, jsRoot, ajaxRequest, requestedHistGroup, requestedHist, timeSliceKey, timeSlice)
    else:
        return (error, None, None, None, None, None, None, None, None, None)

[docs]def validateTrending(request):
    """ Validate requests to the trending page.

    The return tuple contains the validated values. The error value should always be checked first
    before using the other return values (they will be safe, but may not be meaningful).

    Note:
        For the error format in ``error``, see the :doc:`web app README </webAppReadme>`.

    Note:
        Function args are provided through the flask ``request.args`` dictionary.

    Args:
        request (Flask.request): The request object from Flask.
        jsRoot (bool): True if the response should use jsRoot instead of images.
        ajaxRequest (bool): True if the response should be via AJAX.
        subsystemName (str): Name of the requested subsystem. It is fine for it to be an empty string.
            Provided via the ``histGroup`` field since it is treated identically, allowing us to avoid
            the need to define another field for this one case.
        histName (str): Name of the requested histogram. It is fine for it to be an empty string.
    Returns:
        tuple: (error, subsystemName, requestedHist, jsRoot, ajaxRequest), where where error (dict) contains
            any possible errors, subsystemName (str) corresponds to the current subsystem, subsystemName (str)
            is the requested subsystem in the form of a three letter, all capital name (ex. ``EMC``).
            jsRoot (bool) is True if the response should use jsRoot, ajaxRequest (bool) is true if the response
            should be as AJAX.
    """
    error = {}
    try:
        # Determine request parameters
        jsRoot = convertRequestToPythonBool("jsRoot", request.args)
        ajaxRequest = convertRequestToPythonBool("ajaxRequest", request.args)
        # Reuse the hist group infrastructure for retrieving the subsystem
        requestedHistGroup = convertRequestToStringWhichMayBeEmpty("histGroup", request.args)
        subsystemName = requestedHistGroup
        requestedHist = convertRequestToStringWhichMayBeEmpty("histName", request.args)

        # subsystemName could be None, so we first must check if it exists
        if subsystemName and subsystemName not in serverParameters["subsystemList"] + ["TDG"]:
            error.setdefault("Subsystem", []).append("{} is not a valid subsystem!".format(subsystemName))
    except KeyError as e:
        error.setdefault("keyError", []).append("Key error in " + e.args[0])
    except Exception as e:
        error.setdefault("generalError", []).append("Unknown exception! " + str(e))

    if error == {}:
        return (error, subsystemName, requestedHist, jsRoot, ajaxRequest)
    else:
        return (error, None, None, None, None)

## Validate individual values

[docs]def convertRequestToPythonBool(paramName, source):
    """ Converts a requested parameter to a python bool.

    The validation is particularly useful for jsRoot and ajaxRequest. Note that this function
    is fairly similar to `convertRequestToStringWhichMayBeEmpty`.

    Args:
        paramName (str): Name of the parameter in which we are interested in.
        source (dict): Source of the information. Usually request.args or request.form.
    Returns:
        bool: True if the retrieved value was True.
    """
    paramValue = source.get(paramName, False, type=str)
    #logger.info("{paramName}: {paramValue}".format(paramName = paramName, paramValue = paramValue))
    if paramValue is not False:
        paramValue = json.loads(paramValue)
    logger.info("{paramName}: {paramValue}".format(paramName = paramName, paramValue = paramValue))

    return paramValue

[docs]def convertRequestToStringWhichMayBeEmpty(paramName, source):
    """ Handle strings which may be empty or contain "None".

    This validation is particularly useful for validating hist names and hist groups
    request strings to ensure that they are valid strings before doing further validation.
    Empty strings should be treated as ``None``. The ``None`` strings are from the
    timeSlicesValues div on the runPage. Note that this function is fairly similar
    to `convertRequestToPythonBool`.

    Args:
        paramName (str): Name of the parameter in which we are interested in.
        source (dict): Source of the information. Usually request.args or request.form.
    Returns:
        str or None: Validated string or ``None`` if the string is empty or "None".
    """
    paramValue = source.get(paramName, None, type=str)
    logger.info("{paramName}: {paramValue}".format(paramName = paramName, paramValue = paramValue))
    # If we see "None", then we want to be certain that it is ``None``!
    # Otherwise, we will interpret an empty string as a None value.
    if paramValue == "" or paramValue == "None":
        paramValue = None

    # To get an empty string, we need to explicitly select one with this contrived value.
    # We need to do this because it is possible for the group selection pattern to be an empty string,
    # but that is not equal to no hist being selected in a request.
    if paramValue == "nonSubsystemEmptyString":
        paramValue = ""
    logger.info("{paramName}: {paramValue}".format(paramName = paramName, paramValue = paramValue))

    return paramValue

[docs]def convertRequestToPositiveInteger(paramName, source):
    """ Converts a requested parameter into a positive integer.

    This function is somewhat similar to the other conversion and validation functions,
    although it is a bit simpler.

    Args:
        paramName (str): Name of the parameter in which we are interested in.
        source (dict): Source of the information. Usually request.args or request.form.
    Returns:
        int: The requested int or 0 if it was somehow invalid.
    """
    paramValue = source.get(paramName, default = 0, type = int)
    if paramValue < 0:
        paramValue = 0

    logger.info("{}: {}".format(paramName, paramValue))
    return paramValue

[docs]def validateHistGroupAndHistName(histGroup, histName, subsystem, run, error):
    """ Check that the given hist group or hist name exists in the subsystem.

    Look for the requested hist group or hist name within a given subsystem. It requires that the
    hist group and hist name have already been validated to ensure that they are valid strings
    or ``None``. Note that it could be perfectly valid for both to be ``None``!

    Note:
        As of Sept 2016, this check is not performed on the run page because it seems unnecessary
        to check every single value and there could be a substantial performance cost. This
        should be revisited in the future if it becomes a problem.

    Note:
        For the error format in ``error``, see the :doc:`web app README </webAppReadme>`.

    Args:
        histGroup (str or None): Requested hist group.
        histName (str or None): Requested hist name.
        subsystem (subsystemContainer): Subsystem which should contain the hist group and hist name.
        run (runContainer): Run for which the hist group and hist name should exist.
        error (dict): Contains any possible errors following the defined error format. We will append
            any new errors to it.
    Returns:
        None: It will append an error to the error dict if there is a problem with the given hist
            group or hist nine. The error dict should be checked by the returning function
            to determine the result and decide how to proceed.
    """
    # The request with either be for a hist group or a hist name, so we can just use an if statement here.
    if histGroup:
        #logger.info("histGroup: {histGroup}".format(histGroup = histGroup))
        #if histGroup in [group.selectionPattern for group in subsystem.histGroups]:
        foundHistGroup = False
        for i, group in enumerate(subsystem.histGroups):
            #logger.debug("group.selectionPattern: {selectionPattern}".format(selectionPattern = group.selectionPattern))
            if histGroup == group.selectionPattern:
                foundHistGroup = True
                if histName and histName not in subsystem.histGroups[i].histList:
                    error.setdefault("histName", []).append("histName {histName} is not available in histGroup {histGroup} in {prettyName}".format(histName = histName, histGroup = histGroup, prettyName = run.prettyName))

                # Found group - we don't need to look at any more groups
                break

        if not foundHistGroup:
            error.setdefault("histGroup", []).append("histGroup {histGroup} is not available in {prettyName}".format(histGroup = histGroup, prettyName = run.prettyName))
    else:
        if histName and histName not in subsystem.hists.keys():
            error.setdefault("histName", []).append("histName {histName} is not available in {prettyName}".format(histName = histName, prettyName = run.prettyName))

[docs]def retrieveAndValidateTimeSlice(subsystem, error):
    """ Retrieves the time slice key and then returns the corresponding time slice (it is exists).

    This function safely retrieves a ``timeSliceContainer``. In the case of a valid time slice
    key, the corresponding object will be retrieved. However, in the case of "fullProcessing",
    the object will be ``None`` so we can immediately return the full object. Errors will be
    appended under the ``timeSliceKey`` key.

    Note:
        For the error format in ``error``, see the :doc:`web app README </webAppReadme>`.

    Args:
        subsystem (subsystemContainer): Subsystem for which the time slices request was made.
        error (dict): Contains any possible errors following the defined error format. We will append
            any new errors to it.
    Returns:
        tuple: (timeSliceKey, timeSlice) where timeSliceKey (str) is the key under which the time slice
            is stored or "fullProcessing" (which indicates full processing), and timeSlice (timeSliceContainer)
            is the corresponding time slice retrieved from the subsystem, or None if for any reason
            it could not be retrieved.
    """
    # Retrieve the key and validate.
    timeSliceKey = request.args.get("timeSliceKey", "", type=str)
    logger.info("timeSliceKey: {timeSliceKey}".format(timeSliceKey = timeSliceKey))
    if timeSliceKey == "" or timeSliceKey == "None":
        timeSlice = None
        timeSliceKey = None
    else:
        timeSliceKey = json.loads(timeSliceKey)

    # Select the time slice if the key is valid
    if timeSliceKey:
        #logger.debug("timeSlices: {timeSlices}, timeSliceKey: {timeSliceKey}".format(timeSlices = subsystem.timeSlices, timeSliceKey = timeSliceKey))
        # Filter out "fullProcessing"
        if timeSliceKey == "fullProcessing":
            timeSlice = None
        elif timeSliceKey in subsystem.timeSlices.keys():
            timeSlice = subsystem.timeSlices[timeSliceKey]
        else:
            error.setdefault("timeSliceKey", []).append("{timeSliceKey} is not a valid time slice key! Valid time slices include {timeSlices}. Please select a different time slice!".format(timeSliceKey = timeSliceKey, timeSlices = subsystem.timeSlices))
            timeSlice = None
    else:
        # Should be redundant, but left for completeness
        timeSlice = None

    return (timeSliceKey, timeSlice)

[docs]def extractValueFromNextOrRequest(paramName, source):
    """ Extract the selected parameter from the next parameter or directly from the request.

    First attempt to extract the named parameter from the next parameter in the args of the
    request. If it isn't available, then attempt to extract it directly from the request args
    parameters. This is particularly useful for logging the user back in the case of a default
    username.

    Args:
        paramName (str): Name of the parameter to extract.
        source (dict): Source of the information. Usually request.args or request.form.
    Returns:
        str: Value of the extracted parameter.
    """
    # Attempt to extract from the next parameter if it exists
    paramValue = ""
    if "next" in source:
        # Check the next parameter
        nextParam = source.get("next", "", type=str)
        #logger.debug("nextParam: {nextParam}".format(nextParam = nextParam))
        if nextParam != "":
            nextParam = urlparse.urlparse(nextParam)
            #logger.debug("nextParam: {nextParam}".format(nextParam = nextParam))
            # Get the actual parameters

            params = urlparse.parse_qs(nextParam.query)
            #logger.debug("params: {params}".format(params = params))
            try:
                # Has a one entry list
                paramValue = params.get(paramName, "")[0]
            except (KeyError, IndexError) as e:
                logger.warning("Error in getting {paramName}: {args}".format(paramName = paramName, args = e.args[0]))
                paramValue = ""

    # Just try to extract directly if it isn't in the next parameter
    if paramValue == "":
        paramValue = source.get(paramName, "", type=str)
    logger.info("{paramName}: {paramValue}".format(paramName = paramName, paramValue = paramValue))

    return paramValue
Source code for overwatch.webApp.validation

Navigation

Related Topics