Source code for groundhog.general.validation

#!/usr/bin/env python
# -*- coding: utf-8 -*-

__author__ = 'Bruno Stuyts'

import re
import numpy as np
from functools import wraps, partial
import inspect
from copy import deepcopy
from collections import OrderedDict
import warnings


[docs] def validate_float(var_name,value,min_value=None,max_value=None): """ Validates whether a variable can be used as a floating point number and whether it is within specified bounds If a value equals one of the bounds, the validation passes """ try: float(value) except Exception as err: raise TypeError("%s (%s) is not a floating point number - %s" % (var_name,str(value),str(err))) if min_value!=None and value<min_value: raise ValueError("%s (%s) cannot be smaller than %s" % (var_name,str(value),str(min_value))) if max_value!=None and value>max_value: raise ValueError("%s (%s) cannot be greater than %s" % (var_name,str(value),str(max_value))) return True
[docs] def validate_integer(var_name,value,min_value=None,max_value=None): """ Validates whether a variable can be used as an integer and whether it is within specified bounds If a value equals one of the bounds, the validation passes """ try: if int(value)==value: pass else: raise TypeError("Value can be converted to integer (%s) but converted integer does not equal %s" % (str(int(value)),str(value))) except Exception as err: raise TypeError("%s (%s) is not an integer number - %s" % (var_name,str(value),str(err))) if min_value!=None and value<min_value: raise ValueError("%s (%s) cannot be smaller than %s" % (var_name,str(value),str(min_value))) if max_value!=None and value>max_value: raise ValueError("%s (%s) cannot be greater than %s" % (var_name,str(value),str(max_value))) return True
[docs] def validate_boolean(var_name,value): """ Validates whether a variable can be used as a boolean """ try: if bool(value)==value: pass else: raise TypeError("Value can be converted to boolean (%s) but converted boolean does not equal %s" % (str(bool(value)),str(value))) except Exception as err: raise TypeError("%s (%s) is not a boolean - %s" % (var_name,str(value),str(err))) return True
[docs] def validate_string(var_name,value,options=None,regex=None): """ Validates whether a variable can be used as a string. The routine also allows checking whether the string is in a list of strings or whether it matches a specific regex pattern """ try: if str(value)==value: pass else: raise TypeError("Value can be converted to string (%s) but converted string does not equal %s" % (str(value),str(value))) except Exception as err: raise TypeError("%s (%s) is not a string - %s" % (var_name,str(value),str(err))) if options!=None and value not in options: raise ValueError("%s (%s) not included in list of allowable strings (%s)" % (var_name,str(value),str(options))) if regex!=None and not bool(re.match(re.compile(regex), value)): raise ValueError("%s (%s) does not match the required string format (%s)" % (var_name,str(value),str(regex))) return True
[docs] def validate_list(var_name,value,elementtype=None,order=None,unique=None,empty_allowed=None): """ Validates whether a list contains numbers. It allows checking whether these numbers are ascending or descending and whether non-unique values exist """ try: if type(value)==np.ndarray: value=list(value) if list(value)==value or tuple(value)==value: pass else: raise TypeError("Value can be converted to list (%s) but converted list does not equal %s" % (str(value),str(value))) except Exception as err: raise TypeError("%s (%s) is not a list or tuple - %s" % (var_name,str(value),str(err))) if elementtype!=None: try: for i,el in enumerate(value): if elementtype=="float": validate_float(var_name,el) elif elementtype=="string": validate_string(var_name,el) elif elementtype=="int": validate_integer(var_name,el) elif elementtype=="boolean": validate_boolean(var_name,el) else: raise ValueError("Unspecified elementtype") except Exception as err: raise ValueError("Invalid element type for %s, %s required" % (str(el),elementtype)) if order=='ascending': try: if sorted(value)==value and (np.NaN not in value): pass else: raise ValueError("List %s is not ascending" % str(value)) except Exception as err: raise ValueError("%s" % str(err)) elif order=='descending': try: if sorted(value)==list(reversed(value)) and (np.NaN not in value): pass else: raise ValueError("List %s is not descending" % str(value)) except Exception as err: raise ValueError("%s" % str(err)) elif order is None: pass # Nothing happens when order is not specified else: raise ValueError("Incorrect string for list order") if unique==True: if len(value) > len(set(value)): raise ValueError("%s (%s) contains non-unique elements" % (var_name,str(value))) elif unique is None or unique==False: pass # Nothing happens when unique is None or unspecified else: raise ValueError("Validation parameter unique must be boolean") if empty_allowed==False: if len(value)==0: raise ValueError("Empty lists are not allowed") return True
[docs] def map_args(method,var,*args,**kwargs): """ Constructs a data structure with all parameters, their values and the validation parameters which need to be used during validation. :param method: The function for which validation will be applied :param var: The validation data structure, entered as argument of the function decorator :param args: function arguments :param kwargs: function keyword arguments :returns dictionary var_validation which is a copy of the validation data structure it is possible to override __min and __max arguments """ try: # Construct a data structure with all function arguments, defaults are used # Remove self for validators applied to class methods parameter_names = [parameter.name for parameter in inspect.signature(method).parameters.values() \ if ((parameter.kind == parameter.POSITIONAL_OR_KEYWORD) and (parameter.name!='self'))] all_vars = OrderedDict.fromkeys(parameter_names) args = tuple(x for x in args if isinstance(x, (int, float, str, bool, complex, list, tuple, np.ndarray))) for parameter in inspect.signature(method).parameters.values(): if str(parameter) != 'self': if not isinstance(parameter.default, type): all_vars[parameter.name] = parameter.default for key, value in kwargs.items(): if key in parameter_names: all_vars[key] = value for i, arg in enumerate(args): all_vars[list(all_vars.keys())[i]] = args[i] var_validation = deepcopy(var) for key in kwargs.keys(): # Modification of min and max ranges with override # To be changed for not permanent override of min and max if key.endswith('__min'): var_validation[key.replace('__min','')]['min_value'] = kwargs[key] elif key.endswith('__max'): var_validation[key.replace('__max','')]['max_value'] = kwargs[key] # Bind the actual function arguments, this is required because the defaults are otherwise used else: all_vars[key]=kwargs[key] # Add the value used at runtime to the validation data structure, except for raises_errors and validate # which are used elsewhere and not in the validation routine for key in all_vars.keys(): try: var_validation[key]['value'] = all_vars[key] except: pass return var_validation except Exception as err: raise ValueError("Error during mapping of validation parameters to function parameters - %s" % str(err))
[docs] class Validator(object): """ The Validator has the following features - Automatic handling of validation errors - Automatic handling of function output upon errors - Possibility to override the default validation dictionary with custom validation """ def __init__(self, validationspec, outputonerrorspec): self.validationspec = validationspec self.outputonerror = outputonerrorspec def __call__(self, fn): @wraps(fn) def decorated(*args, **kwargs): try: validate = kwargs['validate'] except: validate = None try: fail_silently = kwargs['fail_silently'] except: fail_silently = True try: validation_params = kwargs['customvalidation'] except: validation_params = self.validationspec try: output_for_errors = kwargs['customerroroutput'] except: output_for_errors = self.outputonerror if validate or validate is None: # Execute validation try: var_validation = map_args(fn, validation_params, *args, **kwargs) for v in var_validation.keys(): if var_validation[v]['type'] == 'float': validate_float(v, var_validation[v]['value'], var_validation[v]['min_value'], var_validation[v]['max_value']) elif var_validation[v]['type'] == 'int': validate_integer(v, var_validation[v]['value'], var_validation[v]['min_value'], var_validation[v]['max_value']) elif var_validation[v]['type'] == 'string': validate_string(v, var_validation[v]['value'], options=var_validation[v]['options'], regex=var_validation[v]['regex']) elif var_validation[v]['type'] == 'bool': validate_boolean(v, var_validation[v]['value']) elif var_validation[v]['type'] == 'list': validate_list(v, var_validation[v]['value'], var_validation[v]['elementtype'], var_validation[v]['order'], var_validation[v]['unique'], var_validation[v]['empty_allowed']) except Exception as err: warnings.warn(str(err)) if fail_silently: return output_for_errors else: raise else: # No validation pass try: result = fn(*args, **kwargs) return result except: if fail_silently: return output_for_errors else: raise return decorated
[docs] def check_layer_overlap(df, raise_error=True, z_from_key=None, z_to_key=None): """ Checks possible overlap on a dataframe :param df: Dataframe with keys 'z from [m]' and 'z to [m]'. Other keys can be used but then the arguments `z_from_key` and `z_to_key` need to be provided. :param raise_error: Boolean determining whether an error needs to be raised or whether a warning is sufficient (default behaviour is to raise an error warning) :param z_from_key: Key for start depth of the layer :param z_to_key: Key for end depth of the layer :return: Default behaviour: raises a warning if there are overlaps or gaps. """ # Reset the index first df.reset_index(drop=True, inplace=True) # Set keys for top and bottom depths of layers if z_from_key is None: z_from_key = "z from [m]" if z_to_key is None: z_to_key = "z to [m]" for i, row in df.iterrows(): if i > 0: if row[z_from_key] > df.loc[i-1, z_to_key]: if raise_error: raise ValueError("A gap exists between layer %i and %i" % (i-1, i)) else: warnings.warn("A gap exists between layer %i and %i" % (i-1, i)) elif row[z_from_key] < df.loc[i-1, z_to_key]: if raise_error: raise ValueError("Overlap exists between layer %i and %i" % (i-1, i)) else: warnings.warn("Overlap exists between layer %i and %i" % (i-1, i))