global_attributes

Classes#

GlobalAttributes #

Bases: AttributeModel

Global attributes that will be recorded in the output dataset. These metadata are used to record data provenance information (e.g., location, institution, etc), construct datastream and file names (i.e., location_id, dataset_name, qualifier, temporal, and data_level attributes), as well as provide metadata that is useful for data users (e.g., title, description, ... ).

Attributes#

Conventions `class-attribute` `instance-attribute` #

Conventions: Optional[StrictStr] = Field(
    default="CF-1.6",
    description="A comma-separated list of the conventions that are followed by the dataset.",
)

code_url `class-attribute` `instance-attribute` #

code_url: Optional[HttpUrl] = Field(
    default=None, description="Where the code is hosted."
)

code_version `class-attribute` `instance-attribute` #

code_version: StrictStr = Field(
    default_factory=get_code_version,
    description="Attribute that will be recorded automatically by the pipeline. A warning will be raised if this is set in the config file. The code_version attribute reads the 'CODE_VERSION' environment variable or parses the git history to determine the version of the code. Semantic versioning is used by default (v'major.minor.micro'; e.g., 1.2.3).",
)

data_level `class-attribute` `instance-attribute` #

data_level: str = Field(
    min_length=2,
    max_length=3,
    regex="^[a-z0-9]+$",
    description="A string used to indicate the level of processing of the output data. It should be formatted as a letter followed by a number. Typical values for this include: a1 - data is ingested (no qc), b1 - data is ingested and quality checks applied, c1 (or higher) - one or more a* or b* datastreams used to create a higher-level data product. Only lowercase alphanumeric characters are allowed.",
)

dataset_name `class-attribute` `instance-attribute` #

dataset_name: str = Field(
    min_length=2,
    regex="^[a-z0-9_]+$",
    description="A string used to identify the data being produced. Ideally resembles a shortened lowercase version of the title. Only lowercase alphanumeric characters and '_' are allowed.",
)

datastream `class-attribute` `instance-attribute` #

datastream: StrictStr = Field(
    "",
    description="Typically used as a label that uniquely identifies this data product from any other data product. For file-based storage systems, the datastream attribute is typically used to generate directory structures as f'{location_id}/{datastream}/', with files in that directory typically named as f'{datastream}.{date}.{time}.{ext}'. This attribute is AUTO-GENERATED at run-time, unless it is explicitly set in the config file, in which case the value in the config file will override the default. The default value for 'datastream' is as follows:\n f\"{location_id}.{dataset_name}{_qualifier}{_temporal}.{data_level}\", \nwhere '_qualifier' and '_temporal' are both prepended with a literal '-' character if they are provided. This gives some separation between the 'dataset_name', 'qualifier', and 'temporal' attributes and makes it possible to parse out these specific attributes given a complete datastream label.",
)

description `class-attribute` `instance-attribute` #

description: str = Field(
    min_length=1,
    description="A user-friendly description of the dataset. It should provide enough context about the data for new users to quickly understand how the data can be used.",
)

doi `class-attribute` `instance-attribute` #

doi: Optional[StrictStr] = Field(
    title="DOI",
    default=None,
    description="The DOI that has been registered for this dataset, if applicable.",
)

featureType `class-attribute` `instance-attribute` #

featureType: Optional[StrictStr] = Field(
    title="Feature Type",
    default="timeSeries",
    description="CF attribute for identifying the featureType.",
)

history `class-attribute` `instance-attribute` #

history: StrictStr = Field(
    "",
    description="Attribute that will be recorded automatically by the pipeline. A warning will be raised if this is set in the config file.",
)

institution `class-attribute` `instance-attribute` #

institution: Optional[StrictStr] = Field(
    default=None,
    description="The institution or organization that produces or manages this data.",
)

location_id `class-attribute` `instance-attribute` #

location_id: str = Field(
    min_length=1,
    regex="^[a-zA-Z0-9_]+$",
    description="A label or acronym for the location where the data were obtained from. Only alphanumeric characters and '_' are allowed.",
)

qualifier `class-attribute` `instance-attribute` #

qualifier: Optional[str] = Field(
    default=None,
    min_length=1,
    regex="^[a-zA-Z0-9_]+$",
    description="An optional string which distinguishes these data from other datasets produced by the same instrument. Only alphanumeric characters and '_' are allowed.",
)

references `class-attribute` `instance-attribute` #

references: Optional[StrictStr] = Field(
    default=None,
    description="Optional attribute used to cite other data, algorithms, etc. as needed.",
)

temporal `class-attribute` `instance-attribute` #

temporal: Optional[str] = Field(
    default=None,
    min_length=2,
    regex="^[0-9]+[a-zA-Z]+$",
    description="An optional string which describes the temporal resolution of the data (if spaced in regular intervals). This string should be formatted as a number followed by a unit of measurement, e.g., '10m' would indicate the data is sampled every ten minutes. Only lowercase alphanumeric characters are allowed.",
)

title `class-attribute` `instance-attribute` #

title: str = Field(
    min_length=1,
    description="A succinct description of the dataset. This value may be similar to a publication title and should be suitable for use as a title in plots or other references to this dataset.",
)

Functions#

add_datastream_field #

add_datastream_field(
    values: Dict[str, StrictStr]
) -> Dict[str, StrictStr]

Source code in tsdat/config/attributes/global_attributes.py

@root_validator(skip_on_failure=True)
def add_datastream_field(cls, values: Dict[str, StrictStr]) -> Dict[str, StrictStr]:
    if not values["datastream"]:
        values["datastream"] = get_datastream(**values)
    return values

warn_if_dynamic_properties_are_set #

warn_if_dynamic_properties_are_set(
    v: str, field: ModelField
) -> str

Source code in tsdat/config/attributes/global_attributes.py

@validator("history", "code_version", pre=True)
def warn_if_dynamic_properties_are_set(cls, v: str, field: ModelField) -> str:
    if v:
        logger.warning(
            f"The '{field.name}' attribute should not be set explicitly. The"
            f" current value of '{v}' will be ignored."
        )
    return ""

global_attributes

Classes#

GlobalAttributes #

Attributes#

Conventions class-attribute instance-attribute #

code_url class-attribute instance-attribute #

code_version class-attribute instance-attribute #

data_level class-attribute instance-attribute #

dataset_name class-attribute instance-attribute #

datastream class-attribute instance-attribute #

description class-attribute instance-attribute #

doi class-attribute instance-attribute #

featureType class-attribute instance-attribute #

history class-attribute instance-attribute #

institution class-attribute instance-attribute #

location_id class-attribute instance-attribute #

qualifier class-attribute instance-attribute #

references class-attribute instance-attribute #

temporal class-attribute instance-attribute #

title class-attribute instance-attribute #

Functions#

add_datastream_field #

warn_if_dynamic_properties_are_set #

Functions#

Modules#

Conventions `class-attribute` `instance-attribute` #

code_url `class-attribute` `instance-attribute` #

code_version `class-attribute` `instance-attribute` #

data_level `class-attribute` `instance-attribute` #

dataset_name `class-attribute` `instance-attribute` #

datastream `class-attribute` `instance-attribute` #

description `class-attribute` `instance-attribute` #

doi `class-attribute` `instance-attribute` #

featureType `class-attribute` `instance-attribute` #

history `class-attribute` `instance-attribute` #

institution `class-attribute` `instance-attribute` #

location_id `class-attribute` `instance-attribute` #

qualifier `class-attribute` `instance-attribute` #

references `class-attribute` `instance-attribute` #

temporal `class-attribute` `instance-attribute` #

title `class-attribute` `instance-attribute` #