Source code for hermes_core.util.schema
"""
This module provides schema metadata derivations.
"""
from pathlib import Path
from typing import Optional
from swxsoc.util.schema import SWXSchema
import hermes_core
__all__ = ["HermesDataSchema"]
DEFAULT_GLOBAL_CDF_ATTRS_SCHEMA_FILE = "hermes_default_global_cdf_attrs_schema.yaml"
DEFAULT_VARIABLE_CDF_ATTRS_SCHEMA_FILE = "hermes_default_variable_cdf_attrs_schema.yaml"
[docs]
class HermesDataSchema(SWXSchema):
"""
Class representing a schema for data requirements and formatting, specific to the
HERMES Mission.
There are two main components to the HERMES Data Schema, including both global and
variable attribute information.
Global schema information is loaded from YAML (dict-like) files in the following format:
.. code-block:: yaml
attribute_name:
description: >
Include a meaningful description of the attribute and context needed to understand
its values.
default: <string> # A default value for the attribute if needed/desired
derived: <bool> # Whether or not the attribute's value can be derived using a python function
derivation_fn: <string> # The name of a Python function to derive the value. Must be a function member of the schema class and match the signature below.
required: <bool> # Whether the attribute is required
validate: <bool> # Whether the attribute should be validated by the Validation module
overwrite: <bool> # Whether an existing value for the attribute should be overwritten if a different value is derived.
The signature for all functions to derive global attributes should follow the format below.
The function takes in a parameter `data` which is a `HermesData` object, or that of an
extended data class, and returns a single attribute value for the given attribute to be
derived.
.. code-block:: python
def derivation_fn(self, data: HermesData):
# ... do manipulations as needed from `data`
return "attribute_value"
Variable schema information is loaded from YAML (dict-like) files in the following format:
.. code-block:: yaml
attribute_key:
attribute_name:
description: >
Include a meaningful description of the attribute and context needed to understand
its values.
derived: <bool> # Whether or not the attribute's value can be derived using a python function
derivation_fn: <string> # The name of a Python function to derive the value. Must be a function member of the schema class and match the signature below.
required: <bool> # Whether the attribute is required
validate: <bool> # Whether the attribute should be validated by the Validation module
overwrite: <bool> # Whether an existing value for the attribute should be overwritten if a different value is derived.
valid_values: <list> # A list of valid values that the attribute can take. The value of the attribute is checked against the `valid_values` in the Validation module.
alternate: <string> An additional attribute name that can be treated as an alternative of the given attribute.
data:
- attribute_name
- ...
support_data:
- ...
metadata:
- ...
The signature for all functions to derive variable attributes should follow the format below.
The function takes in parameters `var_name`, `var_data`, and `guess_type`, where:
- `var_name` is the variable name of the variable for which the attribute is being derived
- `var_data` is the variable data of the variable for which the attribute is being derived
- `guess_type` is the guessed CDF variable type of the data for which the attribute is being derived.
The function must return a single attribute value for the given attribute to be derived.
.. code-block:: python
def derivation_fn(self, var_name: str, var_data: Union[Quantity, NDData, NDCube], guess_type: ctypes.c_long):
# ... do manipulations as needed from data
return "attribute_value"
Parameters
----------
global_schema_layers : `Optional[list[Path]]`
Absolute file paths to global attribute schema files. These schema files are layered
on top of one another in a latest-priority ordering. That is, the latest file that modifies
a common schema attribute will take precedence over earlier values for a given attribute.
variable_schema_layers : `Optional[list[Path]]`
Absolute file paths to variable attribute schema files. These schema files are layered
on top of one another in a latest-priority ordering. That is, the latest file that modifies
a common schema attribute will take precedence over earlier values for a given attribute.
use_defaults: `Optional[bool]`
Whether or not to load the default global and variable attribute schema files. These
default schema files contain only the requirements for CDF ISTP validation.
"""
def __init__(
self,
global_schema_layers: Optional[list[Path]] = None,
variable_schema_layers: Optional[list[Path]] = None,
use_defaults: Optional[bool] = True,
):
# HERMES Default Global Schema
global_schema_path = str(
Path(hermes_core.__file__).parent
/ "data"
/ DEFAULT_GLOBAL_CDF_ATTRS_SCHEMA_FILE
)
# HERMES Default Variable Schema
variable_schema_path = str(
Path(hermes_core.__file__).parent
/ "data"
/ DEFAULT_VARIABLE_CDF_ATTRS_SCHEMA_FILE
)
# Seed Layers with Default
if not use_defaults:
_global_schema_layers = []
_variable_schema_layers = []
else:
_global_schema_layers = [global_schema_path]
_variable_schema_layers = [variable_schema_path]
# Extend Custom Layers
if global_schema_layers is not None and len(global_schema_layers) > 0:
_global_schema_layers.extend(global_schema_layers)
if variable_schema_layers is not None and len(variable_schema_layers) > 0:
_variable_schema_layers.extend(variable_schema_layers)
# Call SWxSOC Initialization to populate Schema
super().__init__(
global_schema_layers=_global_schema_layers,
variable_schema_layers=_variable_schema_layers,
use_defaults=use_defaults,
)
def _get_hermes_version(self, data):
"""Function to get the version of hermes_core used to generate the data"""
attr_name = "hermes_version"
if (attr_name not in data.meta) or (not data.meta[attr_name]):
hermes_version = hermes_core.__version__
else:
hermes_version = data.meta[attr_name]
return hermes_version