CatalogConfigResolver

kedro.io.CatalogConfigResolver ¶

CatalogConfigResolver(config=None, credentials=None, default_runtime_patterns=None)

Resolves dataset configurations based on dataset factory patterns and credentials.

The CatalogConfigResolver is responsible for managing dataset factory patterns, resolving credentials, and dynamically generating dataset configurations. It supports advanced features like pattern matching, user-set catch-all patterns, and runtime patterns, enabling flexible and reusable dataset configurations.

Attributes:

_default_runtime_patterns (Patterns) –

Runtime patterns used for resolving datasets.
_dataset_patterns (dict) –

Sorted dataset factory patterns extracted from the catalog configuration.
_user_catch_all_pattern (dict) –

User provided catch all pattern.
_resolved_configs (dict) –

Resolved dataset configurations with credentials applied.

Example: ::

>>> from kedro.io.catalog_config_resolver import CatalogConfigResolver
>>> config = {
...     "{namespace}.int_{name}": {
...         "type": "pandas.CSVDataset",
...         "filepath": "{name}.csv",
...         "credentials": "db_credentials",
...     }
... }
>>> credentials = {"db_credentials": {"user": "username", "pass": "pass"}}
>>> resolver = CatalogConfigResolver(config=config, credentials=credentials)
>>> resolved_config = resolver.resolve_pattern("data.int_customers")
>>> print(resolved_config)
# {'type': 'pandas.CSVDataset', 'filepath': 'customers.csv', 'credentials': {'user': 'username', 'pass': 'pass'}}

Parameters:

config (dict[str, dict[str, Any]] | None, default: None ) –

Dataset configurations from the catalog.
credentials (dict[str, dict[str, Any]] | None, default: None ) –

Credentials for datasets.
default_runtime_patterns (Patterns | None, default: None ) –

Runtime patterns for resolving datasets.

Example: ::

>>> from kedro.io.catalog_config_resolver import CatalogConfigResolver
>>> config = {
...     "{namespace}.int_{name}": {
...         "type": "pandas.CSVDataset",
...         "filepath": "{name}.csv",
...     }
... }
>>> resolver = CatalogConfigResolver(config=config)
>>> resolver._dataset_patterns
# {'{namespace}.int_{name}': {'type': 'pandas.CSVDataset', 'filepath': '{name}.csv'}}

Source code in kedro/io/catalog_config_resolver.py

def __init__(
    self,
    config: dict[str, dict[str, Any]] | None = None,
    credentials: dict[str, dict[str, Any]] | None = None,
    default_runtime_patterns: Patterns | None = None,
):
    """
    Initialize the `CatalogConfigResolver`.

    Args:
        config: Dataset configurations from the catalog.
        credentials: Credentials for datasets.
        default_runtime_patterns: Runtime patterns for resolving datasets.

    Example:
    ::

        >>> from kedro.io.catalog_config_resolver import CatalogConfigResolver
        >>> config = {
        ...     "{namespace}.int_{name}": {
        ...         "type": "pandas.CSVDataset",
        ...         "filepath": "{name}.csv",
        ...     }
        ... }
        >>> resolver = CatalogConfigResolver(config=config)
        >>> resolver._dataset_patterns
        # {'{namespace}.int_{name}': {'type': 'pandas.CSVDataset', 'filepath': '{name}.csv'}}
    """
    if default_runtime_patterns is None:
        self._logger.warning(
            f"Since runtime patterns are not provided, setting "
            f"the runtime pattern to default value: {DEFAULT_RUNTIME_PATTERN}"
        )
    self._default_runtime_patterns = (
        default_runtime_patterns or DEFAULT_RUNTIME_PATTERN
    )
    self._dataset_patterns, self._user_catch_all_pattern = self._extract_patterns(
        config, credentials
    )
    self._resolved_configs = self._resolve_credentials(config, credentials)

_default_runtime_patterns `instance-attribute` ¶

_default_runtime_patterns = default_runtime_patterns or DEFAULT_RUNTIME_PATTERN

_logger `property` ¶

_logger

_resolved_configs `instance-attribute` ¶

_resolved_configs = _resolve_credentials(config, credentials)

config `property` ¶

config

Get the resolved dataset configurations.

Returns:

dict[str, dict[str, Any]] –

Resolved dataset configurations.

Example: ::

>>> from kedro.io.catalog_config_resolver import CatalogConfigResolver
>>> config = {
...     "companies": {
...         "type": "pandas.CSVDataset",
...         "filepath": "companies.csv",
...         "credentials": "db_credentials",
...     }
... }
>>> credentials = {"db_credentials": {"user": "username", "pass": "pass"}}
>>> resolver = CatalogConfigResolver(config, credentials)
>>> print(resolver.config)
# {'companies': {'type': 'pandas.CSVDataset', 'filepath': 'companies.csv', 'credentials': {'user': 'username', 'pass': 'pass'}}}

_extract_patterns `classmethod` ¶

_extract_patterns(config, credentials)

Extract and sort patterns from the catalog configuration.

This method identifies dataset patterns from the catalog configuration and sorts them based on specificity. It also identifies a catch-all pattern (if any) and sets it as the default pattern.

Parameters:

config (dict[str, dict[str, Any]] | None) –

The catalog configuration containing dataset patterns.
credentials (dict[str, dict[str, Any]] | None) –

The credentials for datasets.

Returns:

A tuple containing –
- Sorted dataset patterns.
- The default pattern (if a catch-all pattern is found).

Example: ::

>>> config = {
...     "{namespace}.int_{name}": {"type": "pandas.CSVDataset"},
...     "{name}": {"type": "MemoryDataset"},
... }
>>> sorted_patterns, default_pattern = CatalogConfigResolver._extract_patterns(
...     config, None
... )
>>> print(sorted_patterns.keys())
# dict_keys(['{namespace}.int_{name}'])
>>> print(default_pattern.keys())
# dict_keys(['{name}'])

Source code in kedro/io/catalog_config_resolver.py

@classmethod
def _extract_patterns(
    cls,
    config: dict[str, dict[str, Any]] | None,
    credentials: dict[str, dict[str, Any]] | None,
) -> tuple[Patterns, Patterns]:
    """
    Extract and sort patterns from the catalog configuration.

    This method identifies dataset patterns from the catalog configuration and
    sorts them based on specificity. It also identifies a catch-all pattern (if any)
    and sets it as the default pattern.

    Args:
        config: The catalog configuration containing dataset patterns.
        credentials: The credentials for datasets.

    Returns:
        A tuple containing:
            - Sorted dataset patterns.
            - The default pattern (if a catch-all pattern is found).

    Example:
    ::

        >>> config = {
        ...     "{namespace}.int_{name}": {"type": "pandas.CSVDataset"},
        ...     "{name}": {"type": "MemoryDataset"},
        ... }
        >>> sorted_patterns, default_pattern = CatalogConfigResolver._extract_patterns(
        ...     config, None
        ... )
        >>> print(sorted_patterns.keys())
        # dict_keys(['{namespace}.int_{name}'])
        >>> print(default_pattern.keys())
        # dict_keys(['{name}'])
    """
    config = config or {}
    credentials = credentials or {}
    dataset_patterns = {}
    user_default = {}

    for ds_name, ds_config in config.items():
        if cls.is_pattern(ds_name):
            cls._validate_pattern_config(ds_name, ds_config)
            dataset_patterns[ds_name] = _resolve_credentials(ds_config, credentials)

    sorted_patterns = cls._sort_patterns(dataset_patterns)
    if sorted_patterns:
        # If the last pattern is a catch-all pattern, pop it and set it as the default
        if cls._pattern_specificity(list(sorted_patterns.keys())[-1]) == 0:
            last_pattern = sorted_patterns.popitem()
            user_default = {last_pattern[0]: last_pattern[1]}

    return sorted_patterns, user_default

_get_matches `classmethod` ¶

_get_matches(pattens, ds_name)

Find all patterns that match a given dataset name.

This method iterates over a collection of patterns and checks if the given dataset name matches any of them using the parse function.

Parameters:

pattens (Iterable[str]) –

A collection of patterns to match against.
ds_name (str) –

The name of the dataset to match.

Returns:

Generator[str] –

A generator yielding patterns that match the dataset name.

Example: ::

>>> patterns = ["{namespace}.int_{name}", "{name}"]
>>> matches = CatalogConfigResolver._get_matches(patterns, "data.int_customers")
>>> print(list(matches))
# ['{namespace}.int_{name}']

Source code in kedro/io/catalog_config_resolver.py

@classmethod
def _get_matches(cls, pattens: Iterable[str], ds_name: str) -> Generator[str]:
    """
    Find all patterns that match a given dataset name.

    This method iterates over a collection of patterns and checks if the given
    dataset name matches any of them using the `parse` function.

    Args:
        pattens: A collection of patterns to match against.
        ds_name: The name of the dataset to match.

    Returns:
        A generator yielding patterns that match the dataset name.

    Example:
    ::

        >>> patterns = ["{namespace}.int_{name}", "{name}"]
        >>> matches = CatalogConfigResolver._get_matches(patterns, "data.int_customers")
        >>> print(list(matches))
        # ['{namespace}.int_{name}']
    """
    return (pattern for pattern in pattens if parse(pattern, ds_name))

_get_pattern_config ¶

_get_pattern_config(pattern)

Retrieve the configuration for a given dataset pattern.

This method searches for the configuration of the specified pattern in the dataset patterns, user-set catch-all patterns, and runtime patterns. If the pattern is not found in any of these, an empty dictionary is returned.

Parameters:

pattern (str) –

The dataset pattern to retrieve the configuration for.

Returns:

dict[str, Any] –

The configuration dictionary for the specified pattern,
dict[str, Any] –

or an empty dictionary if the pattern is not found.

Example: ::

>>> resolver = CatalogConfigResolver(
...     config={"example": {"type": "MemoryDataset"}}
... )
>>> resolver._get_pattern_config("{default}")
# {'type': 'kedro.io.MemoryDataset'}

Source code in kedro/io/catalog_config_resolver.py

def _get_pattern_config(self, pattern: str) -> dict[str, Any]:
    """
    Retrieve the configuration for a given dataset pattern.

    This method searches for the configuration of the specified pattern in the
    dataset patterns, user-set catch-all patterns, and runtime patterns. If the pattern is
    not found in any of these, an empty dictionary is returned.

    Args:
        pattern: The dataset pattern to retrieve the configuration for.

    Returns:
        The configuration dictionary for the specified pattern,
        or an empty dictionary if the pattern is not found.

    Example:
    ::

        >>> resolver = CatalogConfigResolver(
        ...     config={"example": {"type": "MemoryDataset"}}
        ... )
        >>> resolver._get_pattern_config("{default}")
        # {'type': 'kedro.io.MemoryDataset'}
    """
    return (
        self._dataset_patterns.get(pattern)
        or self._user_catch_all_pattern.get(pattern)
        or self._default_runtime_patterns.get(pattern)
        or {}
    )

_pattern_specificity `staticmethod` ¶

_pattern_specificity(pattern)

Calculate the specificity of a pattern based on characters outside curly brackets.

Parameters:

pattern (str) –

The pattern to analyze.

Returns:

int –

The number of characters outside curly brackets.

Example: ::

>>> CatalogConfigResolver._pattern_specificity("{namespace}.int_{name}")
# 5

Source code in kedro/io/catalog_config_resolver.py

@staticmethod
def _pattern_specificity(pattern: str) -> int:
    """
    Calculate the specificity of a pattern based on characters outside curly brackets.

    Args:
        pattern: The pattern to analyze.

    Returns:
        The number of characters outside curly brackets.

    Example:
    ::

        >>> CatalogConfigResolver._pattern_specificity("{namespace}.int_{name}")
        # 5
    """
    # Remove all the placeholders from the pattern and count the number of remaining chars
    result = re.sub(r"\{.*?\}", "", pattern)
    return len(result)

_resolve_credentials `classmethod` ¶

_resolve_credentials(config, credentials)

Resolve credentials for datasets in the catalog configuration.

This method replaces credential references in the dataset configuration with the actual credentials from the provided credentials dictionary.

Parameters:

config (dict[str, dict[str, Any]] | None) –

The catalog configuration containing datasets.
credentials (dict[str, dict[str, Any]] | None) –

The credentials for datasets.

Returns:

dict[str, dict[str, Any]] –

The dataset configurations with resolved credentials.

Raises:

DatasetError –

If a dataset configuration is invalid (e.g., not a dictionary).

Example: ::

>>> config = {
...     "example": {
...         "type": "pandas.CSVDataset",
...         "credentials": "db_credentials",
...     }
... }
>>> credentials = {"db_credentials": {"user": "username", "pass": "pass"}}
>>> resolved_configs = CatalogConfigResolver._resolve_credentials(
...     config, credentials
... )
>>> print(resolved_configs)
# {'example': {'type': 'pandas.CSVDataset', 'credentials': {'user': 'username', 'pass': 'pass'}}}

Source code in kedro/io/catalog_config_resolver.py

@classmethod
def _resolve_credentials(
    cls,
    config: dict[str, dict[str, Any]] | None,
    credentials: dict[str, dict[str, Any]] | None,
) -> dict[str, dict[str, Any]]:
    """
    Resolve credentials for datasets in the catalog configuration.

    This method replaces credential references in the dataset configuration with
    the actual credentials from the provided credentials dictionary.

    Args:
        config: The catalog configuration containing datasets.
        credentials: The credentials for datasets.

    Returns:
        The dataset configurations with resolved credentials.

    Raises:
        DatasetError: If a dataset configuration is invalid (e.g., not a dictionary).

    Example:
    ::

        >>> config = {
        ...     "example": {
        ...         "type": "pandas.CSVDataset",
        ...         "credentials": "db_credentials",
        ...     }
        ... }
        >>> credentials = {"db_credentials": {"user": "username", "pass": "pass"}}
        >>> resolved_configs = CatalogConfigResolver._resolve_credentials(
        ...     config, credentials
        ... )
        >>> print(resolved_configs)
        # {'example': {'type': 'pandas.CSVDataset', 'credentials': {'user': 'username', 'pass': 'pass'}}}
    """
    config = config or {}
    credentials = credentials or {}
    resolved_configs = {}

    for ds_name, ds_config in config.items():
        if not isinstance(ds_config, dict):
            raise DatasetError(
                f"Catalog entry '{ds_name}' is not a valid dataset configuration. "
                "\nHint: If this catalog entry is intended for variable interpolation, "
                "make sure that the key is preceded by an underscore."
            )
        if not cls.is_pattern(ds_name):
            resolved_configs[ds_name] = _resolve_credentials(ds_config, credentials)

    return resolved_configs

_resolve_dataset_config `classmethod` ¶

_resolve_dataset_config(ds_name, pattern, config)

Resolve dataset configuration based on the provided pattern.

Parameters:

ds_name (str) –

The dataset name to resolve.
pattern (str) –

The pattern to use for resolution.
config (Any) –

The dataset configuration.

Returns:

Any –

The resolved dataset configuration.

Example: ::

>>> pattern = "{namespace}.int_{name}"
>>> config = {"filepath": "{name}.csv"}
>>> resolved_config = CatalogConfigResolver._resolve_dataset_config(
...     "data.int_customers", pattern, config
... )
>>> print(resolved_config)
# {"filepath": "customers.csv"}

Source code in kedro/io/catalog_config_resolver.py

@classmethod
def _resolve_dataset_config(
    cls,
    ds_name: str,
    pattern: str,
    config: Any,
) -> Any:
    """
    Resolve dataset configuration based on the provided pattern.

    Args:
        ds_name: The dataset name to resolve.
        pattern: The pattern to use for resolution.
        config: The dataset configuration.

    Returns:
        The resolved dataset configuration.

    Example:
    ::

        >>> pattern = "{namespace}.int_{name}"
        >>> config = {"filepath": "{name}.csv"}
        >>> resolved_config = CatalogConfigResolver._resolve_dataset_config(
        ...     "data.int_customers", pattern, config
        ... )
        >>> print(resolved_config)
        # {"filepath": "customers.csv"}
    """
    resolved_vars = parse(pattern, ds_name)
    # Resolve the pattern config for the dataset
    if isinstance(config, dict):
        for key, value in config.items():
            config[key] = cls._resolve_dataset_config(ds_name, pattern, value)
    elif isinstance(config, (list, tuple)):
        config = [
            cls._resolve_dataset_config(ds_name, pattern, value) for value in config
        ]
    elif isinstance(config, str) and "}" in config:
        config = config.format_map(resolved_vars.named)
    return config

_sort_patterns `classmethod` ¶

_sort_patterns(dataset_patterns)

Sort a dictionary of dataset patterns according to parsing rules.

Patterns are sorted in the following order: 1. Decreasing specificity (number of characters outside the curly brackets) 2. Decreasing number of placeholders (number of curly bracket pairs) 3. Alphabetically

Parameters:

dataset_patterns (Patterns) –

A dictionary of dataset patterns.

Returns:

Patterns –

A sorted dictionary of dataset patterns.

Raises:

DatasetError –

If multiple catch-all patterns are found.

Example: ::

>>> patterns = {
...     "{namespace}.int_{name}{a}": {},
...     "{namespace}.{name}": {},
...     "{name}": {},
...     "{namespace}.ant_{name}{c}": {},
... }
>>> sorted_patterns = CatalogConfigResolver._sort_patterns(patterns)
>>> print(sorted_patterns.keys())
# dict_keys(['{namespace}.ant_{name}{c}', '{namespace}.int_{name}{a}', '{namespace}.{name}', '{name}'])

Source code in kedro/io/catalog_config_resolver.py

@classmethod
def _sort_patterns(cls, dataset_patterns: Patterns) -> Patterns:
    """Sort a dictionary of dataset patterns according to parsing rules.

    Patterns are sorted in the following order:
    1. Decreasing specificity (number of characters outside the curly brackets)
    2. Decreasing number of placeholders (number of curly bracket pairs)
    3. Alphabetically

    Args:
        dataset_patterns: A dictionary of dataset patterns.

    Returns:
        A sorted dictionary of dataset patterns.

    Raises:
        DatasetError: If multiple catch-all patterns are found.

    Example:
    ::

        >>> patterns = {
        ...     "{namespace}.int_{name}{a}": {},
        ...     "{namespace}.{name}": {},
        ...     "{name}": {},
        ...     "{namespace}.ant_{name}{c}": {},
        ... }
        >>> sorted_patterns = CatalogConfigResolver._sort_patterns(patterns)
        >>> print(sorted_patterns.keys())
        # dict_keys(['{namespace}.ant_{name}{c}', '{namespace}.int_{name}{a}', '{namespace}.{name}', '{name}'])
    """
    sorted_keys = sorted(
        dataset_patterns,
        key=lambda pattern: (
            -(cls._pattern_specificity(pattern)),
            -pattern.count("{"),
            pattern,
        ),
    )
    catch_all = [
        pattern for pattern in sorted_keys if cls._pattern_specificity(pattern) == 0
    ]
    if len(catch_all) > 1:
        raise DatasetError(
            f"Multiple catch-all patterns found in the catalog: {', '.join(catch_all)}. Only one catch-all pattern is allowed, remove the extras."
        )
    return {key: dataset_patterns[key] for key in sorted_keys}

_unresolve_credentials `staticmethod` ¶

_unresolve_credentials(cred_name, ds_config)

Extracts and replaces credentials in a dataset configuration with references, ensuring separation of credentials from the dataset configuration.

Credentials are searched for recursively in the dataset configuration. The first occurrence of the CREDENTIALS_KEY is replaced with a generated reference key.

Parameters:

cred_name (str) –

A unique identifier for the credentials being unresolved. This is used to generate a reference key for the credentials.
ds_config (dict[str, dict[str, Any]] | None) –

The dataset configuration containing potential credentials under the key CREDENTIALS_KEY.

Returns:

A tuple containing –

ds_config_copy : A deep copy of the original dataset configuration with credentials replaced by reference keys. credentials: A dictionary mapping generated reference keys to the original credentials.

Example: :: >>> config = { ... "type": "pandas.CSVDataset", ... "credentials": {"user": "username", "pass": "pass"}, ... } >>> ds_config, creds = CatalogConfigResolver._unresolve_credentials( ... "example", config ... ) >>> print(ds_config) # {'type': 'pandas.CSVDataset', 'credentials': 'example_credentials'} >>> print(creds) # {'example_credentials': {'user': 'username', 'pass': 'pass'}}

Source code in kedro/io/catalog_config_resolver.py

@staticmethod
def _unresolve_credentials(
    cred_name: str, ds_config: dict[str, dict[str, Any]] | None
) -> tuple[dict[str, dict[str, Any]], dict[str, dict[str, Any]]]:
    """
    Extracts and replaces credentials in a dataset configuration with
    references, ensuring separation of credentials from the dataset configuration.

    Credentials are searched for recursively in the dataset configuration.
    The first occurrence of the `CREDENTIALS_KEY` is replaced with a generated
    reference key.

    Args:
        cred_name: A unique identifier for the credentials being unresolved.
            This is used to generate a reference key for the credentials.
        ds_config: The dataset configuration containing potential credentials
            under the key `CREDENTIALS_KEY`.

    Returns:
        A tuple containing:
            ds_config_copy : A deep copy of the original dataset
                configuration with credentials replaced by reference keys.
            credentials: A dictionary mapping generated reference keys to the original credentials.

    Example:
    ::
        >>> config = {
        ...     "type": "pandas.CSVDataset",
        ...     "credentials": {"user": "username", "pass": "pass"},
        ... }
        >>> ds_config, creds = CatalogConfigResolver._unresolve_credentials(
        ...     "example", config
        ... )
        >>> print(ds_config)
        # {'type': 'pandas.CSVDataset', 'credentials': 'example_credentials'}
        >>> print(creds)
        # {'example_credentials': {'user': 'username', 'pass': 'pass'}}
    """
    ds_config_copy = copy.deepcopy(ds_config) or {}
    credentials: dict[str, Any] = {}
    credentials_ref = f"{cred_name}_{CREDENTIALS_KEY}"

    def unresolve(config: Any) -> None:
        # We don't expect credentials key appears more than once within the same dataset config,
        # So once we found the key first time we unresolve it and stop iterating after
        for key, val in config.items():
            if key == CREDENTIALS_KEY and config[key]:
                credentials[credentials_ref] = config[key]
                config[key] = credentials_ref
                return
            if isinstance(val, dict):
                unresolve(val)

    unresolve(ds_config_copy)

    return ds_config_copy, credentials

_validate_pattern_config `classmethod` ¶

_validate_pattern_config(ds_name, ds_config)

Checks whether a dataset pattern configuration is valid - all keys used in the configuration present in the dataset pattern name.

Parameters:

ds_name (str) –

Dataset pattern name.
ds_config (dict[str, Any]) –

Dataset pattern configuration.

Raises:

DatasetError –

when keys used in the configuration do not present in the dataset pattern name.

Example: ::

>>> pattern = "{namespace}.int_{name}"
>>> config = {"filepath": "{name}.csv"}
>>> CatalogConfigResolver._validate_pattern_config(pattern, config)
# No error
>>> pattern = "{namespace}.int_{name}"
>>> config = {"filepath": "{nam}.csv"}
>>> CatalogConfigResolver._validate_pattern_config(pattern, config)
# DatasetError: Incorrect dataset configuration provided. Keys used in the configuration {'{nam}'} should present in
# the dataset pattern name {namespace}.int_{name}.

Source code in kedro/io/catalog_config_resolver.py

@classmethod
def _validate_pattern_config(cls, ds_name: str, ds_config: dict[str, Any]) -> None:
    """Checks whether a dataset pattern configuration is valid - all
    keys used in the configuration present in the dataset pattern name.

    Args:
        ds_name: Dataset pattern name.
        ds_config: Dataset pattern configuration.

    Raises:
        DatasetError: when keys used in the configuration do not present in the dataset pattern name.

    Example:
    ::

        >>> pattern = "{namespace}.int_{name}"
        >>> config = {"filepath": "{name}.csv"}
        >>> CatalogConfigResolver._validate_pattern_config(pattern, config)
        # No error
        >>> pattern = "{namespace}.int_{name}"
        >>> config = {"filepath": "{nam}.csv"}
        >>> CatalogConfigResolver._validate_pattern_config(pattern, config)
        # DatasetError: Incorrect dataset configuration provided. Keys used in the configuration {'{nam}'} should present in
        # the dataset pattern name {namespace}.int_{name}.
    """
    # Find all occurrences of {} in the string including brackets
    search_regex = r"\{.*?\}"
    name_placeholders = set(re.findall(search_regex, ds_name))
    config_placeholders = set()

    def _traverse_config(config: Any) -> None:
        if isinstance(config, dict):
            for value in config.values():
                _traverse_config(value)
        elif isinstance(config, (list, tuple)):
            for value in config:
                _traverse_config(value)
        elif isinstance(config, str) and "}" in config:
            config_placeholders.update(set(re.findall(search_regex, config)))

    _traverse_config(ds_config)

    if config_placeholders - name_placeholders:
        raise DatasetError(
            f"Incorrect dataset configuration provided. "
            f"Keys used in the configuration {config_placeholders - name_placeholders} "
            f"should present in the dataset pattern name {ds_name}."
        )

is_pattern `staticmethod` ¶

is_pattern(pattern)

Check if a given string is a pattern. Assume that any name with '{' is a pattern.

Parameters:

pattern (str) –

The string to check.

Returns:

bool –

True if the string is a pattern, False otherwise.

Example: ::

>>> CatalogConfigResolver.is_pattern("{namespace}.int_{name}")
# True
>>> CatalogConfigResolver.is_pattern("example_dataset")
# False

Source code in kedro/io/catalog_config_resolver.py

@staticmethod
def is_pattern(pattern: str) -> bool:
    """
    Check if a given string is a pattern. Assume that any name with '{' is a pattern.

    Args:
        pattern: The string to check.

    Returns:
        True if the string is a pattern, False otherwise.

    Example:
    ::

        >>> CatalogConfigResolver.is_pattern("{namespace}.int_{name}")
        # True
        >>> CatalogConfigResolver.is_pattern("example_dataset")
        # False
    """
    return "{" in pattern

list_patterns ¶

list_patterns()

List all patterns available in the catalog.

Returns:

list[str] –

A list of dataset patterns.

Example: ::

>>> config = {
...     "{namespace}.int_{name}": {
...         "type": "pandas.CSVDataset",
...         "filepath": "{name}.csv",
...     }
... }
>>> resolver = CatalogConfigResolver(config)
>>> print(resolver.list_patterns())
# ['{namespace}.int_{name}', '{default}']

Source code in kedro/io/catalog_config_resolver.py

def list_patterns(self) -> list[str]:
    """
    List all patterns available in the catalog.

    Returns:
        A list of dataset patterns.

    Example:
    ::

        >>> config = {
        ...     "{namespace}.int_{name}": {
        ...         "type": "pandas.CSVDataset",
        ...         "filepath": "{name}.csv",
        ...     }
        ... }
        >>> resolver = CatalogConfigResolver(config)
        >>> print(resolver.list_patterns())
        # ['{namespace}.int_{name}', '{default}']
    """
    # User defined patterns
    patterns = list(self._dataset_patterns.keys()) + list(
        self._user_catch_all_pattern.keys()
    )

    # All patterns excluding duplicates
    for pattern in self._default_runtime_patterns.keys():
        if pattern not in patterns:
            patterns.append(pattern)

    return patterns

match_dataset_pattern ¶

match_dataset_pattern(ds_name)

Match a dataset name against dataset patterns.

This method checks if the given dataset name matches any of the dataset patterns defined in the catalog. If a match is found, the first matching pattern is returned.

Parameters:

ds_name (str) –

The name of the dataset to match.

Returns:

str | None –

The first matching pattern, or None if no match is found.

Example: ::

>>> config = {
...     "{namespace}.int_{name}": {"type": "pandas.CSVDataset"},
...     "{name}": {"type": "MemoryDataset"},
... }
>>> resolver = CatalogConfigResolver(config=config)
>>> match = resolver.match_dataset_pattern("data.int_customers")
>>> print(match)
# {namespace}.int_{name}

Source code in kedro/io/catalog_config_resolver.py

def match_dataset_pattern(self, ds_name: str) -> str | None:
    """
    Match a dataset name against dataset patterns.

    This method checks if the given dataset name matches any of the dataset
    patterns defined in the catalog. If a match is found, the first matching
    pattern is returned.

    Args:
        ds_name: The name of the dataset to match.

    Returns:
        The first matching pattern, or `None` if no match is found.

    Example:
    ::

        >>> config = {
        ...     "{namespace}.int_{name}": {"type": "pandas.CSVDataset"},
        ...     "{name}": {"type": "MemoryDataset"},
        ... }
        >>> resolver = CatalogConfigResolver(config=config)
        >>> match = resolver.match_dataset_pattern("data.int_customers")
        >>> print(match)
        # {namespace}.int_{name}
    """
    matches = self._get_matches(self._dataset_patterns.keys(), ds_name)
    return next(matches, None)

match_runtime_pattern ¶

match_runtime_pattern(ds_name)

Match a dataset name against the default runtime pattern.

This method checks if the given dataset name matches any of the default runtime patterns. It assumes that a runtime pattern always matches.

Parameters:

ds_name (str) –

The name of the dataset to match.

Returns:

str –

The first matching runtime pattern.

Example: ::

>>> runtime_patterns = {"{default_example}": {"type": "MemoryDataset"}}
>>> resolver = CatalogConfigResolver(default_runtime_patterns=runtime_patterns)
>>> match = resolver.match_runtime_pattern("example_dataset")
>>> print(match)
# {default_example}

Source code in kedro/io/catalog_config_resolver.py

def match_runtime_pattern(self, ds_name: str) -> str:
    """
    Match a dataset name against the default runtime pattern.

    This method checks if the given dataset name matches any of the default
    runtime patterns. It assumes that a runtime pattern always matches.

    Args:
        ds_name: The name of the dataset to match.

    Returns:
        The first matching runtime pattern.

    Example:
    ::

        >>> runtime_patterns = {"{default_example}": {"type": "MemoryDataset"}}
        >>> resolver = CatalogConfigResolver(default_runtime_patterns=runtime_patterns)
        >>> match = resolver.match_runtime_pattern("example_dataset")
        >>> print(match)
        # {default_example}
    """
    default_patters = set(self._default_runtime_patterns.keys())
    matches = self._get_matches(default_patters, ds_name)
    # We assume runtime pattern always matches at the end
    return next(matches)

match_user_catch_all_pattern ¶

match_user_catch_all_pattern(ds_name)

Match a dataset name against the user-defined catch-all pattern.

This method checks if the given dataset name matches any of the user-defined catch-all patterns. If a match is found, the first matching pattern is returned.

Parameters:

ds_name (str) –

The name of the dataset to match.

Returns:

str | None –

The first matching pattern, or None if no match is found.

Example: ::

>>> config = {"{name}": {"type": "MemoryDataset"}}
>>> resolver = CatalogConfigResolver(config=config)
>>> match = resolver.match_user_catch_all_pattern("example_dataset")
>>> print(match)
# {name}

Source code in kedro/io/catalog_config_resolver.py

def match_user_catch_all_pattern(self, ds_name: str) -> str | None:
    """
    Match a dataset name against the user-defined catch-all pattern.

    This method checks if the given dataset name matches any of the user-defined
    catch-all patterns. If a match is found, the first matching pattern is returned.

    Args:
        ds_name: The name of the dataset to match.

    Returns:
        The first matching pattern, or `None` if no match is found.

    Example:
    ::

        >>> config = {"{name}": {"type": "MemoryDataset"}}
        >>> resolver = CatalogConfigResolver(config=config)
        >>> match = resolver.match_user_catch_all_pattern("example_dataset")
        >>> print(match)
        # {name}
    """
    user_catch_all_pattern = set(self._user_catch_all_pattern.keys())
    matches = self._get_matches(user_catch_all_pattern, ds_name)
    return next(matches, None)

resolve_pattern ¶

resolve_pattern(ds_name)

Resolve a dataset name to its configuration based on patterns.

This method matches the dataset name against catalog patterns and resolves its configuration. If the dataset name matches a user-defined catch-all pattern, a warning is logged.

Parameters:

ds_name (str) –

The name of the dataset to resolve.

Returns:

dict[str, Any] –

The resolved dataset configuration.

Example: ::

>>> config = {
...     "{namespace}.int_{name}": {
...         "type": "pandas.CSVDataset",
...         "filepath": "{name}.csv",
...     }
... }
>>> resolver = CatalogConfigResolver(config=config)
>>> resolved_config = resolver.resolve_pattern("data.int_customers")
>>> print(resolved_config)
# {'type': 'pandas.CSVDataset', 'filepath': 'customers.csv'}

Source code in kedro/io/catalog_config_resolver.py

def resolve_pattern(self, ds_name: str) -> dict[str, Any]:
    """
    Resolve a dataset name to its configuration based on patterns.

    This method matches the dataset name against catalog patterns and resolves
    its configuration. If the dataset name matches a user-defined catch-all pattern, a warning
    is logged.

    Args:
        ds_name: The name of the dataset to resolve.

    Returns:
        The resolved dataset configuration.

    Example:
    ::

        >>> config = {
        ...     "{namespace}.int_{name}": {
        ...         "type": "pandas.CSVDataset",
        ...         "filepath": "{name}.csv",
        ...     }
        ... }
        >>> resolver = CatalogConfigResolver(config=config)
        >>> resolved_config = resolver.resolve_pattern("data.int_customers")
        >>> print(resolved_config)
        # {'type': 'pandas.CSVDataset', 'filepath': 'customers.csv'}
    """
    if ds_name not in self._resolved_configs:
        matched_pattern = (
            self.match_dataset_pattern(ds_name)
            or self.match_user_catch_all_pattern(ds_name)
            or self.match_runtime_pattern(ds_name)
        )
        pattern_config = self._get_pattern_config(matched_pattern)
        ds_config = self._resolve_dataset_config(
            ds_name, matched_pattern, copy.deepcopy(pattern_config)
        )

        if (
            self._pattern_specificity(matched_pattern) == 0
            and matched_pattern in self._user_catch_all_pattern
        ):
            self._logger.warning(
                "Config from the dataset pattern '%s' in the catalog will be used to "
                "override the default dataset creation for '%s'",
                matched_pattern,
                ds_name,
            )
        return ds_config  # type: ignore[no-any-return]

    return self._resolved_configs[ds_name]

CatalogConfigResolver

kedro.io.CatalogConfigResolver ¶

_default_runtime_patterns instance-attribute ¶

_logger property ¶

_resolved_configs instance-attribute ¶

config property ¶

_extract_patterns classmethod ¶

_get_matches classmethod ¶

_get_pattern_config ¶

_pattern_specificity staticmethod ¶

_resolve_credentials classmethod ¶

_resolve_dataset_config classmethod ¶

_sort_patterns classmethod ¶

_unresolve_credentials staticmethod ¶

_validate_pattern_config classmethod ¶

is_pattern staticmethod ¶

list_patterns ¶

match_dataset_pattern ¶

match_runtime_pattern ¶

match_user_catch_all_pattern ¶

resolve_pattern ¶

_default_runtime_patterns `instance-attribute` ¶

_logger `property` ¶

_resolved_configs `instance-attribute` ¶

config `property` ¶

_extract_patterns `classmethod` ¶

_get_matches `classmethod` ¶

_pattern_specificity `staticmethod` ¶

_resolve_credentials `classmethod` ¶

_resolve_dataset_config `classmethod` ¶

_sort_patterns `classmethod` ¶

_unresolve_credentials `staticmethod` ¶

_validate_pattern_config `classmethod` ¶

is_pattern `staticmethod` ¶