# -*- coding: utf-8 -*-
"""
Tools for applying validation analysis to pre-preocess extracts of model and
instrument data.
"""
# ----------------------------------------------------------------------------
# IMPORTS
# ----------------------------------------------------------------------------
# Standard Python Dependencies
# Non-Standard Python Dependencies
import numpy as np
# Local Module Dependencies
from ocmw.core.fileTools import loadmat
from ocmw.core.timeFuncs import dateNumFromDateStr, dateStrFromDateNum
from ocmw.core.statsTools import stats_metrics
from ocmw.core.statsTools import display_metrics
from ocmw.core.graphics import flow_class_diagram
from ocmw.core.graphics import plot_validation_stats
from ocmw.core.graphics import speed_bin_histograms
from ocmw.core.graphics import directional_spread_plot
# Other Dependencies
# ----------------------------------------------------------------------------
# GLOBAL VARIABLES
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# CLASS DEFINITIONS
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# FUNCTION DEFINITIONS
# ----------------------------------------------------------------------------
[docs]
def validateFlowParameter(modFile,obsFile,paramList,doFlowChar=True,varStr='Velocity',velLims=[0.0,4.5]):
"""
Validation of model velocity data against in situ ADCP data
Parameters
----------
modFile : str
Full path to model data file.
obsFile : str
Full path to in situ measurements data file.
paramList : list,str
List of parameters to be processed.
doFlowChar : bool
Flag to toggle application of flow characterisation. Default=True
varStr : str
Variable definition used in plot titles.
velLims : list,float
Lower and upper limits for velocity correlation plot axes.
Returns
-------
metrics : dict
Dictionary of validation metrics for all, flood and ebb tide states.
figHnds : dict
Dictionary of figure handles for all graphics generated.
metadata : dict
Dictionary with information describing data being compared
"""
# Initialise output
metrics = {}
figHnds = {}
# Load pre-processed model data
data_mod = loadmat(modFile)
# Generate flow characterization plot
if doFlowChar:
fh_mod = flow_class_diagram(data_mod)
fh_mod_spdh = speed_bin_histograms(data_mod,binwidth=0.1,maxfrac=0.08)
fh_mod_dspr = directional_spread_plot(data_mod)
figHnds['mod_flowchar'] = fh_mod
figHnds['mod_speedhist'] = fh_mod_spdh
figHnds['mod_dirnhist'] = fh_mod_dspr
#Load pre-processed observation data
data_obs = loadmat(obsFile)
# Generate flow characterization plot
if doFlowChar:
fh_obs = flow_class_diagram(data_obs)
fh_obs_spdh = speed_bin_histograms(data_obs,binwidth=0.1,maxfrac=0.08)
fh_obs_dspr = directional_spread_plot(data_obs)
figHnds['obs_flowchar'] = fh_obs
figHnds['obs_speedhist'] = fh_obs_spdh
figHnds['obs_dirnhist'] = fh_obs_dspr
# Extract metadata for plotting and metric calculations
metadata = {}
metadata['obs_src'] = data_obs['dataSrc']
metadata['obs_deploy'] = data_obs['dataSet']
metadata['mod_src'] = data_mod['dataSrc']
metadata['dataLoc'] = data_obs['dataLoc']
metadata['crs'] = 'WGS84 '+data_obs['crs']+' ('+data_mod['crs']+')'
metadata['hab'] = data_mod['hab']
metadata['zLoc'] = data_mod['zLoc']
# Loop over parameters in list
for paramStr in paramList:
# Load parameter time series data
if paramStr in ['magnitude','magn']:
metrics[paramStr] = {}
figHnds[paramStr] = {}
fieldStr = 'magn'
metricStr = 'magnitude'
displayStr = varStr+' Magnitude'
unitStr = ' ms'+r'$^{-1}$'
min_val = velLims[0]
max_val = velLims[1]
metrics[paramStr]['unitStr'] = unitStr
elif paramStr in ['direction','dirn','thta']:
metrics[paramStr] = {}
figHnds[paramStr] = {}
fieldStr = 'thta'
metricStr = 'direction'
displayStr = varStr+' Direction'
unitStr = r'$^{\circ}$'
min_val = -180.0
max_val = 180.0
metrics[paramStr]['unitStr'] = unitStr
if fieldStr in data_mod.keys():
# Generate full parameter metrics
#--------------------------------------
# Load parameter timeseries data
t_mod = data_mod['times'].copy()+dateNumFromDateStr(data_mod['epoch'].replace('/','-'))
v_mod = data_mod[fieldStr].copy()
t_obs = data_obs['times'].copy()+dateNumFromDateStr(data_obs['epoch'].replace('/','-'))
v_obs = data_obs[fieldStr].copy()
# Interpolate obs onto model timestamps
obs = np.interp(t_mod,t_obs,v_obs,left=np.nan,right=np.nan)
# Exclude missing data
good = np.where(~np.isnan(obs))[0]
mod = v_mod[good]
obs = obs[good]
metadata['StartDate'] = dateStrFromDateNum(np.floor(t_mod[good[0]]))
metadata['NumDays'] = np.floor(t_mod[good[-1]]-t_mod[good[0]])+1
# Calculate validation statistics
metrics_all = stats_metrics(mod,obs,metricStr)
# Show results
display_metrics(metrics_all,metricStr)
metadata['tideState'] = 'All phases'
fig_metrics_all = plot_validation_stats(obs,mod,metrics_all,displayStr,unitStr,min_val,max_val,metadata)
del mod, obs, good
# Generate flood tide parameter metrics
#--------------------------------------
# Select flood only data
ebbind = data_mod['ebb_indices']
t_mod = data_mod['times'].copy().copy()+dateNumFromDateStr(data_mod['epoch'].replace('/','-'))
t_mod[ebbind] = np.nan
v_mod = data_mod[fieldStr].copy()
v_mod[ebbind] = np.nan
del ebbind
ebbind = data_obs['ebb_indices']
t_obs = data_obs['times'].copy()+dateNumFromDateStr(data_obs['epoch'].replace('/','-'))
t_obs[ebbind] = np.nan
v_obs = data_obs[fieldStr].copy()
v_obs[ebbind] = np.nan
del ebbind
# Interpolate obs onto model timestamps
obs = np.interp(t_mod,t_obs,v_obs,left=np.nan,right=np.nan)
# Exclude missing data
good = np.where((~np.isnan(v_mod)) & (~np.isnan(obs)))[0]
mod = v_mod[good]
obs = obs[good]
# Calculate validation statistics
metrics_fld = stats_metrics(mod,obs,metricStr)
# Show results
display_metrics(metrics_fld,metricStr)
metadata['tideState'] = 'Flood only'
fig_metrics_fld = plot_validation_stats(obs,mod,metrics_fld,displayStr,unitStr,min_val,max_val,metadata)
del mod, obs, good
# Generate ebb tide parameter metrics
#--------------------------------------
# Select ebb only data
fldind = data_mod['flood_indices']
t_mod = data_mod['times'].copy()+dateNumFromDateStr(data_mod['epoch'].replace('/','-'))
t_mod[fldind] = np.nan
v_mod = data_mod[fieldStr].copy()
v_mod[fldind] = np.nan
del fldind
fldind = data_obs['flood_indices']
t_obs = data_obs['times'].copy()+dateNumFromDateStr(data_obs['epoch'].replace('/','-'))
t_obs[fldind] = np.nan
v_obs = data_obs[fieldStr].copy()
v_obs[fldind] = np.nan
del fldind
# Interpolate obs onto model timestamps
obs = np.interp(t_mod,t_obs,v_obs,left=np.nan,right=np.nan)
# Exclude missing data
good = np.where((~np.isnan(v_mod)) & (~np.isnan(obs)))[0]
mod = v_mod[good]
obs = obs[good]
# Calculate validation statistics
metrics_ebb = stats_metrics(mod,obs,metricStr)
# Show results
display_metrics(metrics_ebb,metricStr)
metadata['tideState'] = 'Ebb only'
fig_metrics_ebb = plot_validation_stats(obs,mod,metrics_ebb,displayStr,unitStr,min_val,max_val,metadata)
del mod, obs, good
# Store results
metrics[paramStr]['all'] = metrics_all
metrics[paramStr]['flood'] = metrics_fld
metrics[paramStr]['ebb'] = metrics_ebb
figHnds[paramStr]['valid_all'] = fig_metrics_all
figHnds[paramStr]['valid_flood'] = fig_metrics_fld
figHnds[paramStr]['valid_ebb'] = fig_metrics_ebb
# Clean up variable space
del t_obs, v_obs, t_mod, v_mod
del metrics_all, metrics_fld, metrics_ebb
else:
print(' Warning: Parameter '+paramStr+' not available\n No validation metrics calculated.')
return metrics, figHnds, metadata
[docs]
def valid_results_to_file(info, unitStr, metrics, outfile):
# Need to loop over parameters and tide states
fout = open(outfile,'w')
fout.write('Data Description\n')
fout.write('Observation Data Source: '+info['obs_src']+'\n')
fout.write('Deployment ID: '+info['obs_deploy']+'\n')
fout.write('Model Data Source: '+info['mod_src']+'\n')
fout.write('Data Geo Location: ['+str(info['dataLoc'][0])+' , '+str(info['dataLoc'][1])+']\n')
fout.write('Data CRS: '+info['crs']+'\n')
if info['hab']:
fout.write('Data Height Above Bed: '+str(info['zLoc'])+' m'+'\n')
else:
fout.write('Depth Below Surface: '+str(info['zLoc'])+' m'+'\n')
fout.write('Coverage: '+str(info['NumDays'])+' days from '+info['StartDate']+'\n')
fout.write('Tidal State: '+info['tideState']+'\n')
fout.write('\n')
fout.write('Validation Statistics'+'\n')
fout.write('Correlation = '+"{0:+.3f}".format(metrics['R']),+'\n')
fout.write('Goodness-of-fit = '+"{0:+.3f}".format(metrics['R2']),+'\n')
fout.write('Mean Bias = '+"{0:+.3f}".format(metrics['MB'])+unitStr+'\n')
fout.write('Normalised MB = '+"{0:+.2f}".format(metrics['NMB'])+'%'+'\n')
fout.write('Mean Absolute Error = '+"{0:+.3f}".format(metrics['MAE'])+unitStr+'\n')
fout.write('Normalised MAE = '+"{0:+.2f}".format(metrics['NMAE'])+'%'+'\n')
fout.write('RMS Error = '+"{0:+.3f}".format(metrics['RMSE'])+unitStr+'\n')
fout.write('Normalised RMSE = '+"{0:+.2f}".format(metrics['NRMSE'])+'%'+'\n')
fout.write('Scatter Index = '+"{0:+.2f}".format(metrics['SI'])+'%'+'\n')
fout.write('Number of Samples = '+str(np.int32(metrics['NSMPL']))+'\n')
fout.close()
return
# ----------------------------------------------------------------------------
# MAIN PROCESS
# ----------------------------------------------------------------------------
#--------------------------------------------------------------------------
# INTERFACE
#--------------------------------------------------------------------------