Skip to content

topmark.processors.instances

topmark / processors / instances

Declarative built-in processor definitions and binding-derived base registries.

This module centralizes TopMark's built-in processor binding declarations and provides helpers that materialize those declarations into base registries for bindings, processor definitions, and legacy bound processor instances.

get_builtin_processor_bindings

get_builtin_processor_bindings()

Return the explicit built-in processor bindings in declaration order.

Returns:

Type Description
ProcessorBinding

Tuple of built-in ProcessorBinding

...

declarations.

Source code in src/topmark/processors/instances.py
def get_builtin_processor_bindings() -> tuple[ProcessorBinding, ...]:
    """Return the explicit built-in processor bindings in declaration order.

    Returns:
        Tuple of built-in [`ProcessorBinding`][topmark.processors.bindings.ProcessorBinding]
        declarations.
    """
    return _BUILTIN_PROCESSOR_BINDINGS

get_base_processor_binding_registry cached

get_base_processor_binding_registry()

Build and cache the base binding registry from explicit declarations.

The returned mapping is keyed by file type qualified key and stores the bound processor qualified key as its value.

Returns:

Type Description
dict[str, str]

Base mapping of file type qualified key to processor qualified key.

Raises:

Type Description
ProcessorBindingError

If a binding references an unknown file type or if multiple bindings target the same file type qualified key.

Source code in src/topmark/processors/instances.py
@lru_cache(maxsize=1)
def get_base_processor_binding_registry() -> dict[str, str]:
    """Build and cache the base binding registry from explicit declarations.

    The returned mapping is keyed by file type qualified key and stores the
    bound processor qualified key as its value.

    Returns:
        Base mapping of file type qualified key to processor qualified key.

    Raises:
        ProcessorBindingError: If a binding references an unknown file type or
            if multiple bindings target the same file type qualified key.
    """
    from topmark.filetypes.instances import get_base_file_type_registry

    ft_registry: dict[str, FileType] = get_base_file_type_registry()
    ft_by_local_key: dict[str, FileType] = _resolve_filetypes_by_local_key(ft_registry)
    registry: dict[str, str] = {}

    for binding in get_builtin_processor_bindings():
        file_type_name: str = binding.file_type_name
        file_type: FileType | None = ft_by_local_key.get(file_type_name)
        if file_type is None:
            raise ProcessorBindingError(
                message=f"Unknown file type in processor binding: {file_type_name}",
                file_type=file_type_name,
            )

        filetype_qualified_key: str = file_type.qualified_key
        processor_qualified_key: str = make_qualified_key(
            namespace=binding.namespace,
            local_key=binding.processor_class.local_key,
        )

        if filetype_qualified_key in registry:
            raise ProcessorBindingError(
                message=(
                    "Duplicate processor binding for file type qualified key: "
                    f"{filetype_qualified_key}"
                ),
                file_type=filetype_qualified_key,
            )

        registry[filetype_qualified_key] = processor_qualified_key

    return registry

get_base_processor_definition_registry cached

get_base_processor_definition_registry()

Build and cache the base processor-definition registry.

The returned mapping is keyed by processor qualified key. Values are processor definitions derived directly from the explicit built-in processor bindings.

Returns:

Type Description
dict[str, ProcessorDefinition]

Base mapping of processor qualified key to ProcessorDefinition.

Raises:

Type Description
ProcessorBindingError

If multiple built-in bindings resolve to the same processor qualified key but reference different processor classes.

Source code in src/topmark/processors/instances.py
@lru_cache(maxsize=1)
def get_base_processor_definition_registry() -> dict[str, ProcessorDefinition]:
    """Build and cache the base processor-definition registry.

    The returned mapping is keyed by processor qualified key. Values are
    processor definitions derived directly from the explicit built-in processor
    bindings.

    Returns:
        Base mapping of processor qualified key to `ProcessorDefinition`.

    Raises:
        ProcessorBindingError: If multiple built-in bindings resolve to the same
            processor qualified key but reference different processor classes.
    """
    registry: dict[str, ProcessorDefinition] = {}

    for binding in get_builtin_processor_bindings():
        proc_def = ProcessorDefinition(
            namespace=binding.namespace,
            local_key=binding.processor_class.local_key,
            processor_class=binding.processor_class,
        )
        qualified_key: str = proc_def.qualified_key
        existing: ProcessorDefinition | None = registry.get(qualified_key)
        if existing is not None and existing.processor_class is not proc_def.processor_class:
            raise ProcessorBindingError(
                message=(f"Duplicate processor definition for qualified key: {qualified_key}"),
                file_type=qualified_key,
            )
        registry[qualified_key] = proc_def

    return registry

get_base_header_processor_registry cached

get_base_header_processor_registry()

Build and cache the legacy base processor registry of bound instances.

Notes

This helper exists for compatibility with older resolution paths that still expect ready-to-use bound processor instances. New registry code should prefer get_base_processor_definition_registry() together with get_base_processor_binding_registry().

Returns:

Type Description
dict[str, HeaderProcessor]

Base mapping of file type local key to bound HeaderProcessor instances.

Raises:

Type Description
ProcessorBindingError

If a binding references an unknown file type or if multiple bindings target the same file type local key.

Source code in src/topmark/processors/instances.py
@lru_cache(maxsize=1)
def get_base_header_processor_registry() -> dict[str, HeaderProcessor]:
    """Build and cache the legacy base processor registry of bound instances.

    Notes:
        This helper exists for compatibility with older resolution paths that
        still expect ready-to-use bound processor instances. New registry code
        should prefer `get_base_processor_definition_registry()` together with
        `get_base_processor_binding_registry()`.

    Returns:
        Base mapping of file type local key to bound `HeaderProcessor` instances.

    Raises:
        ProcessorBindingError: If a binding references an unknown file type or
            if multiple bindings target the same file type local key.
    """
    from topmark.filetypes.instances import get_base_file_type_registry

    ft_registry: dict[str, FileType] = get_base_file_type_registry()
    ft_by_local_key: dict[str, FileType] = _resolve_filetypes_by_local_key(ft_registry)
    registry: dict[str, HeaderProcessor] = {}

    for binding in get_builtin_processor_bindings():
        file_type_name: str = binding.file_type_name
        file_type: FileType | None = ft_by_local_key.get(file_type_name)
        if file_type is None:
            raise ProcessorBindingError(
                message=f"Unknown file type in processor binding: {file_type_name}",
                file_type=file_type_name,
            )
        if file_type_name in registry:
            raise ProcessorBindingError(
                message=f"Duplicate processor binding for file type: {file_type_name}",
                file_type=file_type_name,
            )
        registry[file_type_name] = _build_processor(binding, file_type)

    return registry