Source code for axiom.drs.processing.ccam

"""Pre and post-processing functions for CCAM."""
import numpy as np
import datetime
import axiom.drs.utilities as adu
import axiom.utilities as au


def _detect_version(ds):
    """The CCAM version can be detected from the history metadata.

    Args:
        ds (xarray.Dataset): Dataset
    
    Returns:
        str : Version.
    """
    history = ds.attrs['history']
    yymm = datetime.datetime.strptime(history.split()[2], '%Y-%m-%d').strftime('%y%m')
    return yymm


def _set_version_metadata(ds, version):
    """Set the version metadata on the DataSet.

    Args:
        ds (xarray.DataSet): Dataset.
        version (str): Version string.

    Returns:
        xarray.Dataset : Dataset with version metadata on it.
    """
    ds.attrs['rcm_model'] = f'CCAM-{version}'
    ds.attrs['rcm_model_cordex'] = f'CCAM-{version}'
    ds.attrs['rcm_model_version'] = version
    ds.attrs['rcm_version'] = version
    ds.attrs['rcm_version_cordex'] = version
    return ds


[docs]def preprocess_ccam(ds, **kwargs): """Preprocess the data upon loading for CORDEX requirments. Args: ds (xarray.Dataset): Dataset. variable (str): Variable to extract along with bnds. Must be used as part of a lambda in open_mfdataset Returns: xarray.Dataset: Dataset with preprocessing applied. """ variable = kwargs['variable'] # Rename metadata keys if needed if 'rlat0' in ds.attrs.keys(): ds.attrs['rlon'] = ds.attrs.pop('rlong0') ds.attrs['rlat'] = ds.attrs.pop('rlat0') # Automatically detect version from inputs if 'model_id' not in kwargs['kwargs'].keys(): version = _detect_version(ds) else: version = kwargs['kwargs']['model_id'].split('-')[-1] ds = _set_version_metadata(ds, version) # Extract the lat/lon bounds as well. if variable: ds = ds[[variable, 'lat_bnds', 'lon_bnds']] return ds
[docs]def center_times(ds, output_frequency): """Centers the times in the dataset. Args: ds (xarray.Dataset): Data. Returns: xarray.Dataset : Data with times centered. """ # non-monthly data is simple, just halve the delta if output_frequency != '1M': dt = ds.time.data[1] - ds.time.data[0] ds['time'] = ds.time + (dt / 2) return ds # Otherwise, we need to apply more logic to the problem. dt = ds.time.data[1:] - ds.time.data[0:-1] new_times = ds.time.data[:] + (dt / 2) new_times = np.append(new_times, new_times[6]) # july ds['time'] = new_times
[docs]def postprocess_ccam(ds, **kwargs): """For CORDEX processing, there is some minor postprocessing that happens. Args: ds (xarray.Dataset): Data. Returns: xarray.Dataset: Data with postprocessing applied. """ logger = au.get_logger(__name__) # Strip out the extra metadata keys (Marcus 20220802) remove_keys = 'ensemble,rcm_institute,rcm_model_cordex,rcm_model,rcm_version_cordex'.split(',') for rk in remove_keys: if rk in ds.attrs.keys(): logger.debug(f'Removing metadata key {rk}') ds.attrs.pop(rk) # Strip out the extra dimensions from bnds (reduces filesize considerably) if 'lat_bnds' in ds.data_vars.keys(): # Drop surplus coordinates ds['lat_bnds'] = au.isolate_coordinate(ds.lat_bnds, 'lat', drop=True) ds['lon_bnds'] = au.isolate_coordinate(ds.lon_bnds, 'lon', drop=True) # Center the times for non-instantaneous data. _is_instantaneous = is_instantaneous(ds, kwargs['variable']) _resampling_applied = kwargs['resampling_applied'] logger.debug(f'is_instantaneous = {_is_instantaneous}') logger.debug(f'resampling_applied = {_resampling_applied}') if _resampling_applied == True: logger.debug('TIME CENTERING TRIGGERED') ds = center_times(ds, output_frequency=['output_frequency']) return ds
[docs]def is_instantaneous(ds, variable): """Checks for the presence of CCAM-specific flags indicating that a variable is instantaneous. Args: ds (xarray.Dataset): Data. variable (str): Variable currently being processed. """ da = ds[variable] # if cell_methods is missing if 'cell_methods' not in da.attrs.keys(): return True # time: point is present if da.attrs['cell_methods'] == 'time: point': return True return False