Source code for tsdat.tstring

from __future__ import annotations

import re
from typing import Callable, Dict, Mapping, Match, Union

__all__ = ("Template",)

_SQUARE_BRACKET_REGEX = r"\[(.*?)\]"
_CURLY_BRACKET_REGEX = r"\{(.*?)\}"


def _substitute(
    template: str,
    mapping: Mapping[str, str | Callable[[], str] | None] | None = None,
    allow_missing: bool = False,
    **kwds: str | Callable[[], str] | None,
) -> str:
    """Substitutes variables in a template string.

    The template string is expected to be formatted in the same way as python f-strings,
    with variables that should be substituted wrapped in curly brances `{}`.
    Additionally, square brackets may be used around curly brackets and other text to
    mark that substitution as optional -- i.e. if the variable cannot be found then the
    text wrapped in the square brackets will be removed.

    Examples:

        `mapping = dict(a="x", b="y", c="z")`

        `substitute("{a}.{b}{c}w", mapping) == "x.yzw"  # True`

        `substitute("{a}.{b}[.{c}]", mapping) == "x.y.z"  # True`

        `substitute("{a}.{b}[.{d}]", mapping) == "x.y"  # True`

        `substitute("{a}.{b}.{d}", mapping, True) == "x.y.{d}"  # True`

        `substitute("{a}.{b}.{d}", mapping, False)  # raises ValueError

    Args:
        template (str): The template string. Variables to substitute should be wrapped
            by curly braces `{}`.
        mapping (Mapping[str, str | Callable[[], str] | None] | None): A key-value pair
            of variable name to the value to replace it with. If the value is a
            string it is dropped-in directly. If it is a no-argument callable the
            return value of the callable is used. If it is None, then it is treated
            as missing and the action taken depends on the `allow_missing` parameter.
        allow_missing (bool, optional): Allow variables outside of square brackets to be
            missing, in which case they are left as-is, including the curly brackets.
            This is intended to allow users to perform some variable substitutions
            before all variables in the mapping are known. Defaults to False.
        **kwds (str | Callable[[], str] | None): Optional extras to be merged into the
            mapping dict. If a keyword passed here has the same name as a key in the
            mapping dict, the value here would be used instead.

    Raises:
        ValueError: If the substitutions cannot be made due to missing variables.

    Returns:
        str: The template string with the appropriate substitutions made.
    """
    if mapping is None:
        mapping = {}
    mapping = {**mapping, **kwds}

    def _sub_curly(match: Match[str]) -> str:
        # group(1) returns string without {}, group(0) returns with {}
        # result is we only do replacements that we can actually do.
        res = mapping.get(match.group(1))
        if callable(res):
            res = res()
        if allow_missing and res is None:
            res = match.group(0)
        elif res is None:
            raise ValueError(f"Substitution cannot be made for key '{match.group(1)}'")
        return res

    def _sub_square(match: Match[str]) -> str:
        # make curly substitutions inside of square brackets or remove the whole thing
        # if substitutions cannot be made.
        try:
            resolved = re.sub(_CURLY_BRACKET_REGEX, _sub_curly, match.group(1))
            return resolved if resolved != match.group(1) else ""
        except ValueError:
            return ""

    squared = re.sub(_SQUARE_BRACKET_REGEX, _sub_square, template)
    resolved = re.sub(_CURLY_BRACKET_REGEX, _sub_curly, squared)

    return resolved


def _generate_regex(template: str) -> str:
    """Generates a regex pattern which can be used to extract the values substituted
    into a template string.

    Args:
        template (str): The template string to generate a regex pattern for.

    Returns:
        str: The regex pattern with named groups according to the template.
    """
    regex_pattern = "^"

    i = 0
    while i < len(template):
        char = template[i]
        if char == "{":
            var_start = i + 1
            var_end = template.index("}", var_start)
            var_name = template[var_start:var_end]
            regex_pattern += f"(?P<{var_name}>[_a-zA-Z0-9]+)"
            i = var_end + 1
        elif char == "[":
            regex_pattern += "(?:"
            i += 1
        elif char == "]":
            regex_pattern += ")?"
            i += 1
        else:
            regex_pattern += re.escape(char)
            i += 1

    regex_pattern += "$"
    return regex_pattern


# def _generate_regex(template: str) -> str:
#     regex_pattern = "^"
#     i = 0
#     while i < len(template):
#         char = template[i]
#         if char == "{":
#             var_start = i + 1
#             var_end = template.index("}", var_start)
#             var_name = template[var_start:var_end]
#             regex_pattern += f"(?P<{var_name}>[^.*]+)?"
#             i = var_end + 1
#         elif char == "[":
#             regex_pattern += "(?:"
#             i += 1
#         elif char == "]":
#             regex_pattern += ")?"
#             i += 1
#         else:
#             regex_pattern += re.escape(char)
#             i += 1
#     regex_pattern += "$"
#     return regex_pattern


# def _generate_regex(template: str) -> str:
#     regex_pattern = "^"
#     is_optional = False
#     optional_part = ""

#     i = 0
#     while i < len(template):
#         char = template[i]
#         if char == "{":
#             var_start = i + 1
#             var_end = template.index("}", var_start)
#             var_name = template[var_start:var_end]

#             if is_optional:
#                 # regex_pattern += f"(?:{re.escape(optional_part)}(?P<{var_name}>[^.]*))?"
#                 regex_pattern += f"(?:{re.escape(optional_part)}(?P<{var_name}>.*?))?"
#                 optional_part = ""
#             else:
#                 regex_pattern += f"(?P<{var_name}>[^.]*)"

#             i = var_end + 1
#         elif char == "[":
#             is_optional = True
#             optional_part = ""
#             i += 1
#         elif char == "]":
#             is_optional = False
#             regex_pattern += re.escape(optional_part) + ")?"
#             optional_part = ""
#             i += 1
#         else:
#             if is_optional:
#                 optional_part += char
#             else:
#                 regex_pattern += re.escape(char)
#             i += 1

#     regex_pattern += "$"
#     return regex_pattern


# def _generate_regex(template: str) -> str:
#     regex_pattern = "^"
#     is_optional = False
#     optional_part = ""

#     i = 0
#     while i < len(template):
#         char = template[i]
#         if char == "{":
#             var_start = i + 1
#             var_end = template.index("}", var_start)
#             var_name = template[var_start:var_end]

#             if is_optional:
#                 regex_pattern += f"(?:{re.escape(optional_part)}(?P<{var_name}>.*?))?"
#                 optional_part = ""
#             else:
#                 regex_pattern += f"(?P<{var_name}>.*?)"

#             i = var_end + 1
#         elif char == "[":
#             is_optional = True
#             optional_part = ""
#             i += 1
#         elif char == "]":
#             is_optional = False
#             if optional_part:
#                 regex_pattern += f"(?:{re.escape(optional_part)})?"
#             optional_part = ""
#             i += 1
#         else:
#             if is_optional:
#                 optional_part += char
#             else:
#                 regex_pattern += re.escape(char)
#             i += 1

#     regex_pattern += "$"
#     return regex_pattern


[docs]class Template: """Python f-string implementation with lazy and optional variable substitutions. The template string is expected to be formatted in the same way as python f-strings, with variables that should be substituted wrapped in curly braces `{}`. Additionally, square brackets may be used around curly brackets and other text to mark that substitution as optional -- i.e. if the variable cannot be found then the text wrapped in the square brackets will be removed. Examples: `mapping = dict(a="x", b="y", c="z")` `TemplateString("{a}.{b}{c}w").substitute(mapping) # -> "x.yzw"` `TemplateString("{a}.{b}[.{c}]").substitute(mapping) # -> "x.y.z"` `TemplateString("{a}.{b}.{d}").substitute(mapping) # raises ValueError` `TemplateString("{a}.{b}[.{d}]").substitute(mapping) # -> "x.y"` `TemplateString("{a}.{b}.{d}").substitute(mapping, True) # -> "x.y.{d}"` Args: template (str): The template string. Variables to substitute should be wrapped by curly braces `{}`. regex (str | None, optional): A regex pattern used to extract the substitutions used to create a formatted string with this template. Generated automatically if not provided. """ def __init__(self, template: str, regex: str | None = None) -> None: """""" if not self._is_balanced(template): raise ValueError(f"Unbalanced brackets in template string: '{template}'") self.template = template self.regex = regex or _generate_regex(template)
[docs] def __repr__(self) -> str: return f"{self.__class__.__name__}({self.template!r})"
[docs] def __str__(self) -> str: return self.template
@classmethod def _is_balanced(cls, template: str): stack: list[str] = [] for char in template: if char in "{[": stack.append("}" if char == "{" else "]") elif char in "}]": if not stack or char != stack.pop(): return False return len(stack) == 0
[docs] def substitute( self, mapping: Mapping[str, str | Callable[[], str] | None] | None = None, allow_missing: bool = False, **kwds: str | Callable[[], str] | None, ) -> str: """Substitutes variables in a template string. Args: mapping (Mapping[str, str | Callable[[], str] | None] | None): A key-value pair of variable name to the value to replace it with. If the value is a string it is dropped-in directly. If it is a no-argument callable the return value of the callable is used. If it is None, then it is treated as missing and the action taken depends on the `allow_missing` parameter. allow_missing (bool, optional): Allow variables outside of square brackets to be missing, in which case they are left as-is, including the curly brackets. This is intended to allow users to perform some variable substitutions before all variables in the mapping are known. Defaults to False. **kwds (str | Callable[[], str] | None): Optional extras to be merged into the mapping dict. If a keyword passed here has the same name as a key in the mapping dict, the value here would be used instead. Raises: ValueError: If the substitutions cannot be made due to missing variables. Returns: str: The template string with the appropriate substitutions made. """ return _substitute(self.template, mapping, allow_missing, **kwds)
[docs] def extract_substitutions(self, formatted_str: str) -> dict[str, str] | None: """Extracts the substitutions used to create the provided formatted string. Note that this is not guaranteed to return accurate results if the template is constructed such that separators between variables are ambiguous. Args: formatted_str (str): The formatted string Returns: dict[str, str]: A dictionary mapping each matched template variable to its value in the formatted string. Returns None if there are no matches. """ match = re.match(self.regex, formatted_str) if match: return match.groupdict() else: return None