Skip to content

YadsSpec

YadsSpec dataclass

Canonical yads specification.

Represents a complete table definition including columns, constraints, storage properties, partitioning, and metadata. Instances are immutable.

Parameters:

Name Type Description Default
name str

Fully qualified table name (e.g., "catalog.database.table").

required
version int

Registry-assigned monotonic integer version for tracking changes.

required
yads_spec_version str

Version of the yads specification format itself.

YADS_SPEC_VERSION
columns list[Column]

List of Column objects defining the table structure.

_empty_columns()
description str | None

Optional human-readable description of the table.

None
external bool

Whether to generate CREATE EXTERNAL TABLE statements.

False
storage Storage | None

Storage configuration including format and properties.

None
partitioned_by list[TransformedColumnReference]

List of partition columns.

_empty_partitions()
table_constraints list[TableConstraint]

List of table-level constraints (e.g., composite keys).

_empty_table_constraints()
metadata dict[str, Any]

Additional metadata as key-value pairs.

_empty_metadata()

Raises:

Type Description
SpecValidationError

If the spec contains validation errors such as duplicate column names, undefined partition columns, or invalid constraint references.

Source code in src/yads/spec.py
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
@dataclass(frozen=True)
class YadsSpec:
    """Canonical yads specification.

    Represents a complete table definition including columns, constraints,
    storage properties, partitioning, and metadata. Instances are immutable.

    Args:
        name: Fully qualified table name (e.g., "catalog.database.table").
        version: Registry-assigned monotonic integer version for tracking changes.
        yads_spec_version: Version of the yads specification format itself.
        columns: List of Column objects defining the table structure.
        description: Optional human-readable description of the table.
        external: Whether to generate `CREATE EXTERNAL TABLE` statements.
        storage: Storage configuration including format and properties.
        partitioned_by: List of partition columns.
        table_constraints: List of table-level constraints (e.g., composite keys).
        metadata: Additional metadata as key-value pairs.

    Raises:
        SpecValidationError: If the spec contains validation errors such as
                             duplicate column names, undefined partition columns,
                             or invalid constraint references.
    """

    name: str
    version: int
    yads_spec_version: str = YADS_SPEC_VERSION
    columns: list[Column] = field(default_factory=_empty_columns)
    description: str | None = None
    external: bool = False
    storage: Storage | None = None
    partitioned_by: list[TransformedColumnReference] = field(
        default_factory=_empty_partitions
    )
    table_constraints: list[TableConstraint] = field(
        default_factory=_empty_table_constraints
    )
    metadata: dict[str, Any] = field(default_factory=_empty_metadata)

    def __post_init__(self):
        self._validate_columns()
        self._validate_partitions()
        self._validate_generated_columns()
        self._validate_table_constraints()

    def _validate_columns(self):
        names: set[str] = set()
        for c in self.columns:
            if c.name in names:
                raise SpecValidationError(f"Duplicate column name found: {c.name!r}.")
            names.add(c.name)

    def _validate_partitions(self):
        for p_col in self.partition_column_names:
            if p_col not in self.column_names:
                raise SpecValidationError(
                    f"Partition column {p_col!r} must be defined as a column in the schema."
                )

    def _validate_generated_columns(self):
        for gen_col, source_col in self.generated_columns.items():
            if source_col not in self.column_names:
                raise SpecValidationError(
                    f"Source column {source_col!r} for generated column {gen_col!r} "
                    "not found in schema."
                )

    def _validate_table_constraints(self):
        for constraint in self.table_constraints:
            for col in constraint.constrained_columns:
                if col not in self.column_names:
                    raise SpecValidationError(
                        f"Column {col!r} in constraint {constraint} not found in schema."
                    )

    def to_dict(self) -> dict[str, Any]:
        """Serialize this spec into the canonical dictionary format."""
        from .serializers.spec_serializer import SpecSerializer

        return SpecSerializer().serialize(self)

    @cached_property
    def column_names(self) -> set[str]:
        """Set of all column names defined in the spec."""
        return {c.name for c in self.columns}

    @cached_property
    def partition_column_names(self) -> set[str]:
        """Set of column names referenced as partition columns."""
        return {p.column for p in self.partitioned_by}

    @cached_property
    def generated_columns(self) -> dict[str, str]:
        """Mapping of generated column names to their source columns with format:
        `{generated_column_name: source_column_name}`.
        """
        return {
            c.name: c.generated_as.column
            for c in self.columns
            if c.generated_as is not None
        }

    @cached_property
    def nullable_columns(self) -> set[str]:
        """Set of column names that allow NULL values."""
        return {c.name for c in self.columns if c.is_nullable}

    @cached_property
    def constrained_columns(self) -> set[str]:
        """Set of column names that have any constraints defined."""
        return {c.name for c in self.columns if c.has_constraints}

    def _build_header_str(self) -> str:
        return f"spec {self.name}(version={self.version!r})"

    def _build_body_str(self) -> str:
        parts: list[str] = []
        if self.description:
            parts.append(f"description={self.description!r}")
        if self.metadata:
            parts.append(
                f"metadata={_format_dict_as_kwargs(self.metadata, multiline=True)}"
            )
        if self.external:
            parts.append("external=True")
        if self.storage:
            parts.append(f"storage={self.storage}")
        if self.partitioned_by:
            p_cols = ", ".join(map(str, self.partitioned_by))
            parts.append(f"partitioned_by=[{p_cols}]")
        if self.table_constraints:
            constraints_str = "\n".join(map(str, self.table_constraints))
            parts.append(
                f"table_constraints=[\n{textwrap.indent(constraints_str, '  ')}\n]"
            )

        columns_str = "\n".join(f"{column}" for column in self.columns)
        indented_columns = textwrap.indent(columns_str, "  ")
        parts.append(f"columns=[\n{indented_columns}\n]")
        return "\n".join(parts)

    def __str__(self) -> str:
        body = textwrap.indent(self._build_body_str(), "  ")
        return f"{self._build_header_str()}(\n{body}\n)"

column_names cached property

Set of all column names defined in the spec.

constrained_columns cached property

Set of column names that have any constraints defined.

generated_columns cached property

Mapping of generated column names to their source columns with format: {generated_column_name: source_column_name}.

nullable_columns cached property

Set of column names that allow NULL values.

partition_column_names cached property

Set of column names referenced as partition columns.

to_dict()

Serialize this spec into the canonical dictionary format.

Source code in src/yads/spec.py
297
298
299
300
301
def to_dict(self) -> dict[str, Any]:
    """Serialize this spec into the canonical dictionary format."""
    from .serializers.spec_serializer import SpecSerializer

    return SpecSerializer().serialize(self)

Field dataclass

A named, typed data field with optional constraints.

Parameters:

Name Type Description Default
name str

Field identifier.

required
type YadsType

Logical yads type of the field.

required
description str | None

Optional human-friendly description.

None
metadata dict[str, Any]

Arbitrary key-value metadata for consumers.

_empty_metadata()
constraints list[ColumnConstraint]

Column-level constraints such as nullability or checks.

_empty_constraints()
Source code in src/yads/spec.py
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
@dataclass(frozen=True)
class Field:
    """A named, typed data field with optional constraints.

    Args:
        name: Field identifier.
        type: Logical yads type of the field.
        description: Optional human-friendly description.
        metadata: Arbitrary key-value metadata for consumers.
        constraints: Column-level constraints such as nullability or checks.
    """

    name: str
    type: YadsType
    description: str | None = None
    metadata: dict[str, Any] = field(default_factory=_empty_metadata)
    constraints: list[ColumnConstraint] = field(default_factory=_empty_constraints)

    @cached_property
    def has_metadata(self) -> bool:
        """True if the field has any metadata defined."""
        return bool(self.metadata)

    @cached_property
    def is_nullable(self) -> bool:
        """True if this field allows NULL values."""
        return not any(isinstance(c, NotNullConstraint) for c in self.constraints)

    @cached_property
    def has_constraints(self) -> bool:
        """True if this field has any constraints defined."""
        return bool(self.constraints)

    @cached_property
    def constraint_types(self) -> set[Type[ColumnConstraint]]:
        """Set of constraint types applied to this field."""
        return {type(constraint) for constraint in self.constraints}

    def _build_details_repr(self) -> str:
        details: list[str] = []
        if self.description:
            details.append(f"description={self.description!r}")
        if self.constraints:
            constraints_str = ", ".join(map(str, self.constraints))
            details.append(f"constraints=[{constraints_str}]")
        if self.metadata:
            details.append(f"metadata={_format_dict_as_kwargs(self.metadata)}")

        if not details:
            return ""

        pretty_details = ",\n".join(details)
        return f"(\n{textwrap.indent(pretty_details, '  ')}\n)"

    def __str__(self) -> str:
        details_repr = self._build_details_repr()
        return f"{self.name}: {self.type}{details_repr}"

constraint_types cached property

Set of constraint types applied to this field.

has_constraints cached property

True if this field has any constraints defined.

has_metadata cached property

True if the field has any metadata defined.

is_nullable cached property

True if this field allows NULL values.

Column dataclass

Bases: Field

Table column extending Field with generation support.

Parameters:

Name Type Description Default
name str

Column name.

required
type YadsType

Logical yads type of the column.

required
description str | None

Optional human-friendly description.

None
metadata dict[str, Any]

Arbitrary key-value metadata for consumers.

_empty_metadata()
constraints list[ColumnConstraint]

Column-level constraints such as nullability or checks.

_empty_constraints()
generated_as TransformedColumnReference | None

Optional expression defining a generated/computed column.

None
Source code in src/yads/spec.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
@dataclass(frozen=True)
class Column(Field):
    """Table column extending `Field` with generation support.

    Args:
        name: Column name.
        type: Logical yads type of the column.
        description: Optional human-friendly description.
        metadata: Arbitrary key-value metadata for consumers.
        constraints: Column-level constraints such as nullability or checks.
        generated_as: Optional expression defining a generated/computed column.
    """

    generated_as: TransformedColumnReference | None = None

    @cached_property
    def is_generated(self) -> bool:
        """True if this column is a generated/computed column."""
        return self.generated_as is not None

    def _build_details_repr(self) -> str:
        details: list[str] = []
        if self.description:
            details.append(f"description={self.description!r}")
        if self.constraints:
            constraints_str = ", ".join(map(str, self.constraints))
            details.append(f"constraints=[{constraints_str}]")
        if self.metadata:
            details.append(f"metadata={_format_dict_as_kwargs(self.metadata)}")
        if self.generated_as:
            details.append(f"generated_as={self.generated_as}")

        if not details:
            return ""

        pretty_details = ",\n".join(details)
        return f"(\n{textwrap.indent(pretty_details, '  ')}\n)"

constraint_types cached property

Set of constraint types applied to this field.

has_constraints cached property

True if this field has any constraints defined.

has_metadata cached property

True if the field has any metadata defined.

is_generated cached property

True if this column is a generated/computed column.

is_nullable cached property

True if this field allows NULL values.

TransformedColumnReference dataclass

Reference to a column with an optional transformation.

Parameters:

Name Type Description Default
column str

Name of the referenced column.

required
transform str | None

Transformation function applied to the column, if any.

None
transform_args list[Any]

Arguments passed to the transformation.

_empty_any_list()
Source code in src/yads/spec.py
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
@dataclass(frozen=True)
class TransformedColumnReference:
    """Reference to a column with an optional transformation.

    Args:
        column: Name of the referenced column.
        transform: Transformation function applied to the column, if any.
        transform_args: Arguments passed to the transformation.
    """

    column: str
    transform: str | None = None
    transform_args: list[Any] = field(default_factory=_empty_any_list)

    def __str__(self) -> str:
        if self.transform:
            if self.transform_args:
                args_str = ", ".join(map(str, self.transform_args))
                return f"{self.transform}({self.column}, {args_str})"
            return f"{self.transform}({self.column})"
        return self.column

Storage dataclass

Physical storage properties for a table.

Parameters:

Name Type Description Default
format str | None

Storage format (e.g., "parquet", "delta").

None
location str | None

Optional URI/path to the stored data.

None
tbl_properties dict[str, str]

Format-specific storage properties.

_empty_tbl_properties()
Source code in src/yads/spec.py
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
@dataclass(frozen=True)
class Storage:
    """Physical storage properties for a table.

    Args:
        format: Storage format (e.g., "parquet", "delta").
        location: Optional URI/path to the stored data.
        tbl_properties: Format-specific storage properties.
    """

    format: str | None = None
    location: str | None = None
    tbl_properties: dict[str, str] = field(default_factory=_empty_tbl_properties)

    def __str__(self) -> str:
        parts: list[str] = []
        if self.format:
            parts.append(f"format={self.format!r}")
        if self.location:
            parts.append(f"location={self.location!r}")
        if self.tbl_properties:
            tbl_props_str = _format_dict_as_kwargs(self.tbl_properties, multiline=True)
            parts.append(f"tbl_properties={tbl_props_str}")

        pretty_parts = ",\n".join(parts)
        indented_parts = textwrap.indent(pretty_parts, "  ")
        return f"Storage(\n{indented_parts}\n)"

from_dict(data)

Build a YadsSpec from a normalized dictionary.

Parameters:

Name Type Description Default
data Mapping[str, Any]

Canonical spec dictionary to deserialize.

required

Returns:

Type Description
YadsSpec

A fully validated YadsSpec instance.

Source code in src/yads/spec.py
18
19
20
21
22
23
24
25
26
27
28
29
def from_dict(data: Mapping[str, Any]) -> YadsSpec:
    """Build a `YadsSpec` from a normalized dictionary.

    Args:
        data: Canonical spec dictionary to deserialize.

    Returns:
        A fully validated `YadsSpec` instance.
    """
    from .serializers.spec_serializer import SpecDeserializer

    return SpecDeserializer().deserialize(data)