"""Pre and post-processing functions for CCAM."""
import numpy as np
import datetime
import axiom.drs.utilities as adu
import axiom.utilities as au
def _detect_version(ds):
"""The CCAM version can be detected from the history metadata.
Args:
ds (xarray.Dataset): Dataset
Returns:
str : Version.
"""
history = ds.attrs['history']
yymm = datetime.datetime.strptime(history.split()[2], '%Y-%m-%d').strftime('%y%m')
return yymm
def _set_version_metadata(ds, version):
"""Set the version metadata on the DataSet.
Args:
ds (xarray.DataSet): Dataset.
version (str): Version string.
Returns:
xarray.Dataset : Dataset with version metadata on it.
"""
ds.attrs['rcm_model'] = f'CCAM-{version}'
ds.attrs['rcm_model_cordex'] = f'CCAM-{version}'
ds.attrs['rcm_model_version'] = version
ds.attrs['rcm_version'] = version
ds.attrs['rcm_version_cordex'] = version
return ds
[docs]def preprocess_ccam(ds, **kwargs):
"""Preprocess the data upon loading for CORDEX requirments.
Args:
ds (xarray.Dataset): Dataset.
variable (str): Variable to extract along with bnds. Must be used as part of a lambda in open_mfdataset
Returns:
xarray.Dataset: Dataset with preprocessing applied.
"""
variable = kwargs['variable']
# Rename metadata keys if needed
if 'rlat0' in ds.attrs.keys():
ds.attrs['rlon'] = ds.attrs.pop('rlong0')
ds.attrs['rlat'] = ds.attrs.pop('rlat0')
# Automatically detect version from inputs
if 'model_id' not in kwargs['kwargs'].keys():
version = _detect_version(ds)
else:
version = kwargs['kwargs']['model_id'].split('-')[-1]
ds = _set_version_metadata(ds, version)
# Extract the lat/lon bounds as well.
if variable:
ds = ds[[variable, 'lat_bnds', 'lon_bnds']]
return ds
[docs]def center_times(ds, output_frequency):
"""Centers the times in the dataset.
Args:
ds (xarray.Dataset): Data.
Returns:
xarray.Dataset : Data with times centered.
"""
# non-monthly data is simple, just halve the delta
if output_frequency != '1M':
dt = ds.time.data[1] - ds.time.data[0]
ds['time'] = ds.time + (dt / 2)
return ds
# Otherwise, we need to apply more logic to the problem.
dt = ds.time.data[1:] - ds.time.data[0:-1]
new_times = ds.time.data[:] + (dt / 2)
new_times = np.append(new_times, new_times[6]) # july
ds['time'] = new_times
[docs]def postprocess_ccam(ds, **kwargs):
"""For CORDEX processing, there is some minor postprocessing that happens.
Args:
ds (xarray.Dataset): Data.
Returns:
xarray.Dataset: Data with postprocessing applied.
"""
logger = au.get_logger(__name__)
# Strip out the extra metadata keys (Marcus 20220802)
remove_keys = 'ensemble,rcm_institute,rcm_model_cordex,rcm_model,rcm_version_cordex'.split(',')
for rk in remove_keys:
if rk in ds.attrs.keys():
logger.debug(f'Removing metadata key {rk}')
ds.attrs.pop(rk)
# Strip out the extra dimensions from bnds (reduces filesize considerably)
if 'lat_bnds' in ds.data_vars.keys():
# Drop surplus coordinates
ds['lat_bnds'] = au.isolate_coordinate(ds.lat_bnds, 'lat', drop=True)
ds['lon_bnds'] = au.isolate_coordinate(ds.lon_bnds, 'lon', drop=True)
# Center the times for non-instantaneous data.
_is_instantaneous = is_instantaneous(ds, kwargs['variable'])
_resampling_applied = kwargs['resampling_applied']
logger.debug(f'is_instantaneous = {_is_instantaneous}')
logger.debug(f'resampling_applied = {_resampling_applied}')
if _resampling_applied == True:
logger.debug('TIME CENTERING TRIGGERED')
ds = center_times(ds, output_frequency=['output_frequency'])
return ds
[docs]def is_instantaneous(ds, variable):
"""Checks for the presence of CCAM-specific flags indicating that a variable is instantaneous.
Args:
ds (xarray.Dataset): Data.
variable (str): Variable currently being processed.
"""
da = ds[variable]
# if cell_methods is missing
if 'cell_methods' not in da.attrs.keys():
return True
# time: point is present
if da.attrs['cell_methods'] == 'time: point':
return True
return False