Skip to content

📋 Template System

Functions for enforcing consistent data structures and validation.

🎯 Overview

Template system provides data validation and structure enforcement for reliable deserialization.

📦 Functions

TemplateDeserializer

datason.TemplateDeserializer(template: Any, strict: bool = True, fallback_auto_detect: bool = True)

Template-based deserializer for enhanced type fidelity and round-trip scenarios.

This class allows users to provide a template object that guides the deserialization process, ensuring that the output matches the expected structure and types.

Initialize template deserializer.

Parameters:

Name Type Description Default
template Any

Template object to guide deserialization

required
strict bool

If True, raise errors when structure doesn't match

True
fallback_auto_detect bool

If True, use auto-detection when template doesn't match

True
Source code in datason/deserializers_new.py
def __init__(self, template: Any, strict: bool = True, fallback_auto_detect: bool = True):
    """Initialize template deserializer.

    Args:
        template: Template object to guide deserialization
        strict: If True, raise errors when structure doesn't match
        fallback_auto_detect: If True, use auto-detection when template doesn't match
    """
    self.template = template
    self.strict = strict
    self.fallback_auto_detect = fallback_auto_detect
    self._template_info = self._analyze_template()

deserialize(obj: Any) -> Any

Deserialize object using template guidance.

Parameters:

Name Type Description Default
obj Any

Serialized object to deserialize

required

Returns:

Type Description
Any

Deserialized object matching template structure

Source code in datason/deserializers_new.py
def deserialize(self, obj: Any) -> Any:
    """Deserialize object using template guidance.

    Args:
        obj: Serialized object to deserialize

    Returns:
        Deserialized object matching template structure
    """
    try:
        return self._deserialize_with_template(obj, self.template)
    except Exception as e:
        if self.strict:
            raise TemplateDeserializationError(
                f"Failed to deserialize with template {type(self.template).__name__}: {e}"
            ) from e
        elif self.fallback_auto_detect:
            warnings.warn(f"Template deserialization failed, falling back to auto-detection: {e}", stacklevel=2)
            return auto_deserialize(obj, aggressive=True)
        else:
            return obj

deserialize_with_template()

datason.deserialize_with_template(obj: Any, template: Any, **kwargs: Any) -> Any

Convenience function for template-based deserialization.

Parameters:

Name Type Description Default
obj Any

Serialized object to deserialize

required
template Any

Template object to guide deserialization

required
**kwargs Any

Additional arguments for TemplateDeserializer

{}

Returns:

Type Description
Any

Deserialized object matching template structure

Examples:

>>> import pandas as pd
>>> template_df = pd.DataFrame({'a': [1], 'b': ['text']})
>>> serialized_data = [{'a': 2, 'b': 'hello'}, {'a': 3, 'b': 'world'}]
>>> result = deserialize_with_template(serialized_data, template_df)
>>> isinstance(result, pd.DataFrame)
True
>>> result.dtypes['a']  # Should match template
int64
Source code in datason/deserializers_new.py
def deserialize_with_template(obj: Any, template: Any, **kwargs: Any) -> Any:
    """Convenience function for template-based deserialization.

    Args:
        obj: Serialized object to deserialize
        template: Template object to guide deserialization
        **kwargs: Additional arguments for TemplateDeserializer

    Returns:
        Deserialized object matching template structure

    Examples:
        >>> import pandas as pd
        >>> template_df = pd.DataFrame({'a': [1], 'b': ['text']})
        >>> serialized_data = [{'a': 2, 'b': 'hello'}, {'a': 3, 'b': 'world'}]
        >>> result = deserialize_with_template(serialized_data, template_df)
        >>> isinstance(result, pd.DataFrame)
        True
        >>> result.dtypes['a']  # Should match template
        int64
    """
    deserializer = TemplateDeserializer(template, **kwargs)
    return deserializer.deserialize(obj)

infer_template_from_data()

datason.infer_template_from_data(data: Any, max_samples: int = 100) -> Any

Infer a template from sample data.

This function analyzes sample data to create a template that can be used for subsequent template-based deserialization.

Parameters:

Name Type Description Default
data Any

Sample data to analyze (list of records, DataFrame, etc.)

required
max_samples int

Maximum number of samples to analyze

100

Returns:

Type Description
Any

Inferred template object

Examples:

>>> sample_data = [
...     {'name': 'Alice', 'age': 30, 'date': '2023-01-01T10:00:00'},
...     {'name': 'Bob', 'age': 25, 'date': '2023-01-02T11:00:00'}
... ]
>>> template = infer_template_from_data(sample_data)
>>> # template will be a dict with expected types
Source code in datason/deserializers_new.py
def infer_template_from_data(data: Any, max_samples: int = 100) -> Any:
    """Infer a template from sample data.

    This function analyzes sample data to create a template that can be used
    for subsequent template-based deserialization.

    Args:
        data: Sample data to analyze (list of records, DataFrame, etc.)
        max_samples: Maximum number of samples to analyze

    Returns:
        Inferred template object

    Examples:
        >>> sample_data = [
        ...     {'name': 'Alice', 'age': 30, 'date': '2023-01-01T10:00:00'},
        ...     {'name': 'Bob', 'age': 25, 'date': '2023-01-02T11:00:00'}
        ... ]
        >>> template = infer_template_from_data(sample_data)
        >>> # template will be a dict with expected types
    """
    if isinstance(data, list) and data:
        # Analyze list of records
        return _infer_template_from_records(data[:max_samples])
    elif pd is not None and isinstance(data, pd.DataFrame):
        # Use DataFrame structure directly as template
        return data.iloc[: min(1, len(data))].copy()
    elif pd is not None and isinstance(data, pd.Series):
        # Use Series structure directly as template
        return data.iloc[: min(1, len(data))].copy()
    elif isinstance(data, dict):
        # Use single dict as template
        return data
    else:
        # Cannot infer meaningful template
        return data

create_ml_round_trip_template()

datason.create_ml_round_trip_template(ml_object: Any) -> Dict[str, Any]

Create a template optimized for ML object round-trip serialization.

This function creates templates specifically designed for machine learning workflows where perfect round-trip fidelity is crucial.

Parameters:

Name Type Description Default
ml_object Any

ML object (model, dataset, etc.) to create template for

required

Returns:

Type Description
Dict[str, Any]

Template dictionary with ML-specific metadata

Examples:

>>> import sklearn.linear_model
>>> model = sklearn.linear_model.LogisticRegression()
>>> template = create_ml_round_trip_template(model)
>>> # template will include model structure, parameters, etc.
Source code in datason/deserializers_new.py
def create_ml_round_trip_template(ml_object: Any) -> Dict[str, Any]:
    """Create a template optimized for ML object round-trip serialization.

    This function creates templates specifically designed for machine learning
    workflows where perfect round-trip fidelity is crucial.

    Args:
        ml_object: ML object (model, dataset, etc.) to create template for

    Returns:
        Template dictionary with ML-specific metadata

    Examples:
        >>> import sklearn.linear_model
        >>> model = sklearn.linear_model.LogisticRegression()
        >>> template = create_ml_round_trip_template(model)
        >>> # template will include model structure, parameters, etc.
    """
    template = {
        "__ml_template__": True,
        "object_type": type(ml_object).__name__,
        "module": getattr(ml_object, "__module__", None),
    }

    # Handle pandas objects
    if pd is not None and isinstance(ml_object, pd.DataFrame):
        template.update(
            {
                "structure_type": "dataframe",
                "columns": list(ml_object.columns),
                "dtypes": {col: str(dtype) for col, dtype in ml_object.dtypes.items()},
                "index_name": ml_object.index.name,
                "shape": ml_object.shape,
            }
        )
    elif pd is not None and isinstance(ml_object, pd.Series):
        template.update(
            {
                "structure_type": "series",
                "dtype": str(ml_object.dtype),
                "name": ml_object.name,
                "index_name": ml_object.index.name,
                "length": len(ml_object),
            }
        )

    # Handle numpy arrays
    elif np is not None and isinstance(ml_object, np.ndarray):
        template.update(
            {
                "structure_type": "numpy_array",
                "shape": ml_object.shape,
                "dtype": str(ml_object.dtype),
                "fortran_order": np.isfortran(ml_object),
            }
        )

    # Handle sklearn models
    elif hasattr(ml_object, "get_params"):
        try:
            template.update(
                {
                    "structure_type": "sklearn_model",
                    "parameters": ml_object.get_params(),
                    "fitted": hasattr(ml_object, "classes_") or hasattr(ml_object, "coef_"),
                }
            )
        except Exception:
            pass  # nosec B110

    return template