Skip to content

dataset

Classes#

ACDDDatasetConfig #

Bases: DatasetConfig

Attributes#

attrs class-attribute instance-attribute #
attrs: ACDDGlobalAttrs = Field(
    ...,
    description="Attributes that pertain to the dataset as a whole (as opposed to attributes that are specific to individual variables.",
)

DatasetConfig #

Bases: YamlModel


Defines the structure and metadata of the dataset produced by a tsdat pipeline.

Also provides methods to support yaml parsing and validation, including generation of json schema.

Parameters:

Name Type Description Default
attrs GlobalAttributes

Attributes that pertain to the dataset as a whole.

required
coords Dict[str, Coordinate]

The dataset's coordinate variables.

required
data_vars Dict[str, Variable]

The dataset's data variables.

required

Attributes#

attrs class-attribute instance-attribute #
attrs: GlobalAttributes = Field(
    description="Attributes that pertain to the dataset as a whole (as opposed to attributes that are specific to individual variables."
)
coords class-attribute instance-attribute #
coords: Dict[str, Coordinate] = Field(
    description="This section defines the coordinate variables that the rest of the data are dimensioned by. Coordinate variable data can either be retrieved from an input data source or defined statically via the 'data' property. Note that tsdat requires the dataset at least be dimensioned by a 'time' variable. Most datasets will only need the 'time' coordinate variable, but multidimensional datasets (e.g., ADCP or Lidar data (time, height)) are well-supported. Note that the 'dims' attribute is still required for coordinate variables, and that this value should be [<name>], where <name> is the name of the coord (e.g., 'time')."
)
data_vars class-attribute instance-attribute #
data_vars: Dict[str, Variable] = Field(
    description="This section defines the data variables that the output dataset will contain. Variable data can either be retrieved from an input data source, defined statically via the 'data' property, or initalized to missing and set dynamically via user code in a tsdat pipeline."
)

Functions#

set_variable_name_property classmethod #
set_variable_name_property(
    vars: Dict[str, Dict[str, Any]]
) -> Dict[str, Dict[str, Any]]
Source code in tsdat/config/dataset.py
@validator("coords", "data_vars", pre=True)
@classmethod
def set_variable_name_property(
    cls, vars: Dict[str, Dict[str, Any]]
) -> Dict[str, Dict[str, Any]]:
    for name in vars.keys():
        vars[name]["name"] = name
    return vars
time_in_coords classmethod #
time_in_coords(
    coords: Dict[str, Coordinate]
) -> Dict[str, Coordinate]
Source code in tsdat/config/dataset.py
@validator("coords")
@classmethod
def time_in_coords(cls, coords: Dict[str, Coordinate]) -> Dict[str, Coordinate]:
    if "time" not in coords:
        raise ValueError("Required coordinate definition 'time' is missing.")
    return coords
validate_variable_name_uniqueness classmethod #
validate_variable_name_uniqueness(values: Any) -> Any
Source code in tsdat/config/dataset.py
@root_validator(skip_on_failure=True)
@classmethod
def validate_variable_name_uniqueness(cls, values: Any) -> Any:
    coord_names = set(values["coords"].keys())
    var_names = set(values["data_vars"].keys())

    if duplicates := coord_names.intersection(var_names):
        raise ValueError(
            "Variables cannot be both coords and data_vars:"
            f" {sorted(list(duplicates))}."
        )
    return values
variable_names_are_legal(
    vars: Dict[str, Variable], field: ModelField
) -> Dict[str, Variable]
Source code in tsdat/config/dataset.py
@validator("coords", "data_vars")
def variable_names_are_legal(
    cls, vars: Dict[str, Variable], field: ModelField
) -> Dict[str, Variable]:
    for name in vars.keys():
        pattern = re.compile(r"^[a-zA-Z0-9_\(\)\/\[\]\{\}\.]+$")
        if not pattern.match(name):
            raise ValueError(
                f"'{name}' is not a valid '{field.name}' name. It must be a value"
                f" matched by {pattern}."
            )
    return vars

IOOSDatasetConfig #

Bases: DatasetConfig

Attributes#

attrs class-attribute instance-attribute #
attrs: IOOSGlobalAttrs = Field(
    description="Attributes that pertain to the dataset as a whole (as opposed to attributes that are specific to individual variables."
)