Skip to content

Converters

Convert a canonical YadsSpec to runtime schema objects for multiple formats.

import yads

spec = yads.from_yaml("docs/src/specs/submissions.yaml")
schema = yads.to_pyarrow(spec)
print(schema)
submission_id: int64 not null
completion_percent: decimal128(5, 2)
time_taken_seconds: int32
submitted_at: timestamp[ns, tz=UTC]

The following sections outline the high level-entry points functions available in yads. You can refer to the dedicated converter documentation for their complete API reference with more customization options.

Target Helper Converter Install
PyArrow yads.to_pyarrow yads.converters.PyArrowConverter uv add 'yads[pyarrow]'
Pydantic yads.to_pydantic yads.converters.PydanticConverter uv add 'yads[pydantic]'
Polars yads.to_polars yads.converters.PolarsConverter uv add 'yads[polars]'
PySpark yads.to_pyspark yads.converters.PySparkConverter uv add 'yads[pyspark]'
Spark SQL yads.to_sql(dialect="spark") yads.converters.sql.SparkSQLConverter uv add 'yads[sql]'
DuckDB SQL yads.to_sql(dialect="duckdb") yads.converters.sql.DuckdbSQLConverter uv add 'yads[sql]'
Target Helper Converter Install
PyArrow yads.to_pyarrow yads.converters.PyArrowConverter pip install 'yads[pyarrow]'
Pydantic yads.to_pydantic yads.converters.PydanticConverter pip install 'yads[pydantic]'
Polars yads.to_polars yads.converters.PolarsConverter pip install 'yads[polars]'
PySpark yads.to_pyspark yads.converters.PySparkConverter pip install 'yads[pyspark]'
Spark SQL yads.to_sql(dialect="spark") yads.converters.sql.SparkSQLConverter pip install 'yads[sql]'
DuckDB SQL yads.to_sql(dialect="duckdb") yads.converters.sql.DuckdbSQLConverter pip install 'yads[sql]'

Shared customization options

All converters expose the following arguments for shaping output:

Columns scope

Restrict conversion to only the columns you need for a given consumer.

import yads

spec = yads.from_yaml("docs/src/specs/submissions.yaml")
print(spec)
spec prod.assessments.submissions(version=1)(
  columns=[
    submission_id: integer(bits=64)(
      constraints=[PrimaryKeyConstraint(), NotNullConstraint()]
    )
    completion_percent: decimal(precision=5, scale=2)(
      constraints=[DefaultConstraint(value=0.0)]
    )
    time_taken_seconds: integer(bits=32)
    submitted_at: timestamptz(unit=ns, tz=UTC)
  ]
)
schema = yads.to_pyarrow(
    spec,
    include_columns={"submission_id", "submitted_at"},
)
print(schema)
submission_id: int64 not null
submitted_at: timestamp[ns, tz=UTC]

Conversion mode

Switch between strict (mode="raise") and permissive (mode="coerce") runs per call. Here, a Variant column fails in raise mode.

import pyarrow as pa
import yads.types as ytypes
from yads.spec import Column
from dataclasses import replace

spec = yads.from_yaml("docs/src/specs/submissions.yaml")
spec_with_variant = replace(
    spec,
    columns=[*spec.columns, Column(name="payload", type=ytypes.Variant())],
)

try:
    yads.to_pyarrow(
        spec_with_variant,
        mode="raise",
        fallback_type=pa.string(),
    )
except Exception as exc:
    print(type(exc).__name__ + ": " + str(exc))
UnsupportedFeatureError: PyArrowConverter does not support type: variant for 'payload'.

But coerces to the default fallback_type when in coerce mode.

schema = yads.to_pyarrow(
    spec_with_variant,
    mode="coerce",
    fallback_type=pa.string(),
)
print(schema.field("payload"))
pyarrow.Field<payload: string>

Fallback type

Provide a fallback Arrow type to keep converting unsupported logical types when running in coerce mode.

schema = yads.to_pyarrow(
    spec_with_variant,
    mode="coerce",
    fallback_type=pa.large_binary(),
)
print(schema.field("payload"))
pyarrow.Field<payload: large_binary>

Column overrides

Customize individual fields for downstream constraints or metadata.

def submitted_at_override(field, conv):
    return pa.field(
        field.name,
        pa.date32(),
        nullable=False,
        metadata={"description": "Replaced to pa.date32"},
    )

schema = yads.to_pyarrow(
    spec,
    column_overrides={"submitted_at": submitted_at_override},
)
print(schema)
submission_id: int64 not null
completion_percent: decimal128(5, 2)
time_taken_seconds: int32
submitted_at: date32[day] not null
  -- field metadata --
  description: 'Replaced to pa.date32'

Wrapper helpers

to_pyarrow(spec, *, mode='coerce', ignore_columns=None, include_columns=None, column_overrides=None, use_large_string=False, use_large_binary=False, use_large_list=False, fallback_type=None)

Convert a YadsSpec to a pyarrow.Schema.

Parameters:

Name Type Description Default
spec YadsSpec

The validated yads specification to convert.

required
mode Literal['raise', 'coerce']

Conversion mode. "raise" raises on unsupported features; "coerce" adjusts with warnings. Defaults to "coerce".

'coerce'
ignore_columns set[str] | None

Columns to exclude from conversion.

None
include_columns set[str] | None

If provided, only these columns are included.

None
column_overrides Mapping[str, PyArrowColumnOverride] | None

Per-column custom conversion callables.

None
use_large_string bool

Use pa.large_string() for string columns.

False
use_large_binary bool

Use pa.large_binary() when binary has no fixed length.

False
use_large_list bool

Use pa.large_list(element) for variable-size arrays.

False
fallback_type Any | None

Fallback Arrow type used in coerce mode for unsupported types. When set, overrides the default built-in pa.string(). Defaults to None.

None

Returns:

Type Description
Any

A pyarrow.Schema instance.

Source code in src/yads/converters/__init__.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
@requires_dependency("pyarrow", min_version="15.0.0", import_name="pyarrow")
def to_pyarrow(
    spec: YadsSpec,
    *,
    # BaseConverterConfig options
    mode: Literal["raise", "coerce"] = "coerce",
    ignore_columns: set[str] | None = None,
    include_columns: set[str] | None = None,
    column_overrides: Mapping[str, PyArrowColumnOverride] | None = None,
    # PyArrowConverterConfig options
    use_large_string: bool = False,
    use_large_binary: bool = False,
    use_large_list: bool = False,
    fallback_type: Any | None = None,
) -> Any:
    """Convert a `YadsSpec` to a `pyarrow.Schema`.

    Args:
        spec: The validated yads specification to convert.
        mode: Conversion mode. "raise" raises on unsupported features;
            "coerce" adjusts with warnings. Defaults to "coerce".
        ignore_columns: Columns to exclude from conversion.
        include_columns: If provided, only these columns are included.
        column_overrides: Per-column custom conversion callables.
        use_large_string: Use `pa.large_string()` for string columns.
        use_large_binary: Use `pa.large_binary()` when binary has no fixed length.
        use_large_list: Use `pa.large_list(element)` for variable-size arrays.
        fallback_type: Fallback Arrow type used in coerce mode for unsupported types.
            When set, overrides the default built-in `pa.string()`. Defaults to None.

    Returns:
        A `pyarrow.Schema` instance.
    """
    from . import pyarrow_converter

    config = pyarrow_converter.PyArrowConverterConfig(
        mode=mode,
        ignore_columns=frozenset(ignore_columns) if ignore_columns else frozenset[str](),
        include_columns=frozenset(include_columns) if include_columns else None,
        column_overrides=cast(
            Mapping[str, PyArrowColumnOverride], column_overrides or {}
        ),
        use_large_string=use_large_string,
        use_large_binary=use_large_binary,
        use_large_list=use_large_list,
        fallback_type=fallback_type,
    )
    return pyarrow_converter.PyArrowConverter(config).convert(spec)

to_pydantic(spec, *, mode='coerce', ignore_columns=None, include_columns=None, column_overrides=None, model_name=None, model_config=None, fallback_type=None)

Convert a YadsSpec to a Pydantic BaseModel subclass.

Parameters:

Name Type Description Default
spec YadsSpec

The validated yads specification to convert.

required
mode Literal['raise', 'coerce']

Conversion mode. "raise" raises on unsupported features; "coerce" adjusts with warnings. Defaults to "coerce".

'coerce'
ignore_columns set[str] | None

Columns to exclude from conversion.

None
include_columns set[str] | None

If provided, only these columns are included.

None
column_overrides Mapping[str, PydanticColumnOverride] | None

Per-column custom conversion callables.

None
model_name str | None

Custom name for the generated model. When not set, the spec name is used as spec.name.replace(".", "_"). Defaults to None.

None
model_config dict[str, Any] | None

Optional Pydantic model configuration dict. See more at https://docs.pydantic.dev/2.0/usage/model_config/

None
fallback_type type[str] | type[dict[Any, Any]] | type[bytes] | None

Fallback Python type used in coerce mode for unsupported types. When set, overrides the default built-in str. Defaults to None.

None

Returns:

Type Description
type[BaseModel]

A dynamically generated Pydantic model class.

Source code in src/yads/converters/__init__.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
@requires_dependency("pydantic", min_version="2.0.0", import_name="pydantic")
def to_pydantic(
    spec: YadsSpec,
    *,
    # BaseConverterConfig options
    mode: Literal["raise", "coerce"] = "coerce",
    ignore_columns: set[str] | None = None,
    include_columns: set[str] | None = None,
    column_overrides: Mapping[str, PydanticColumnOverride] | None = None,
    # PydanticConverterConfig options
    model_name: str | None = None,
    model_config: dict[str, Any] | None = None,
    fallback_type: type[str] | type[dict[Any, Any]] | type[bytes] | None = None,
) -> type[BaseModel]:
    """Convert a `YadsSpec` to a Pydantic `BaseModel` subclass.

    Args:
        spec: The validated yads specification to convert.
        mode: Conversion mode. "raise" raises on unsupported features;
            "coerce" adjusts with warnings. Defaults to "coerce".
        ignore_columns: Columns to exclude from conversion.
        include_columns: If provided, only these columns are included.
        column_overrides: Per-column custom conversion callables.
        model_name: Custom name for the generated model. When not set, the spec name is
            used as `spec.name.replace(".", "_")`. Defaults to None.
        model_config: Optional Pydantic model configuration dict. See more at
            https://docs.pydantic.dev/2.0/usage/model_config/
        fallback_type: Fallback Python type used in coerce mode for unsupported types.
            When set, overrides the default built-in `str`. Defaults to None.

    Returns:
        A dynamically generated Pydantic model class.
    """
    config = PydanticConverterConfig(
        mode=mode,
        ignore_columns=frozenset(ignore_columns) if ignore_columns else frozenset[str](),
        include_columns=frozenset(include_columns) if include_columns else None,
        column_overrides=cast(
            Mapping[str, PydanticColumnOverride], column_overrides or {}
        ),
        model_name=model_name,
        model_config=model_config,
        fallback_type=fallback_type,
    )
    return PydanticConverter(config).convert(spec)

to_polars(spec, *, mode='coerce', ignore_columns=None, include_columns=None, column_overrides=None, fallback_type=None)

Convert a YadsSpec to a polars.Schema.

Parameters:

Name Type Description Default
spec YadsSpec

The validated yads specification to convert.

required
mode Literal['raise', 'coerce']

Conversion mode. "raise" raises on unsupported features; "coerce" adjusts with warnings. Defaults to "coerce".

'coerce'
ignore_columns set[str] | None

Columns to exclude from conversion.

None
include_columns set[str] | None

If provided, only these columns are included.

None
column_overrides Mapping[str, PolarsColumnOverride] | None

Per-column custom conversion callables.

None
fallback_type DataType | None

Fallback Polars data type used in coerce mode for unsupported types. When set, overrides the default built-in pl.String. Defaults to None.

None

Returns:

Type Description
Schema

A polars.Schema instance.

Source code in src/yads/converters/__init__.py
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
@requires_dependency("polars", min_version="1.0.0", import_name="polars")
def to_polars(
    spec: YadsSpec,
    *,
    # BaseConverterConfig options
    mode: Literal["raise", "coerce"] = "coerce",
    ignore_columns: set[str] | None = None,
    include_columns: set[str] | None = None,
    column_overrides: Mapping[str, PolarsColumnOverride] | None = None,
    # PolarsConverterConfig options
    fallback_type: pl.DataType | None = None,
) -> pl.Schema:
    """Convert a `YadsSpec` to a `polars.Schema`.

    Args:
        spec: The validated yads specification to convert.
        mode: Conversion mode. "raise" raises on unsupported features;
            "coerce" adjusts with warnings. Defaults to "coerce".
        ignore_columns: Columns to exclude from conversion.
        include_columns: If provided, only these columns are included.
        column_overrides: Per-column custom conversion callables.
        fallback_type: Fallback Polars data type used in coerce mode for unsupported types.
            When set, overrides the default built-in `pl.String`. Defaults to None.

    Returns:
        A `polars.Schema` instance.
    """
    from . import polars_converter

    config = polars_converter.PolarsConverterConfig(
        mode=mode,
        ignore_columns=frozenset(ignore_columns) if ignore_columns else frozenset[str](),
        include_columns=frozenset(include_columns) if include_columns else None,
        column_overrides=cast(Mapping[str, PolarsColumnOverride], column_overrides or {}),
        fallback_type=fallback_type,
    )
    return polars_converter.PolarsConverter(config).convert(spec)

to_pyspark(spec, *, mode='coerce', ignore_columns=None, include_columns=None, column_overrides=None, fallback_type=None)

Convert a YadsSpec to a PySpark StructType.

Parameters:

Name Type Description Default
spec YadsSpec

The validated yads specification to convert.

required
mode Literal['raise', 'coerce']

Conversion mode. "raise" raises on unsupported features; "coerce" adjusts with warnings. Defaults to "coerce".

'coerce'
ignore_columns set[str] | None

Columns to exclude from conversion.

None
include_columns set[str] | None

If provided, only these columns are included.

None
column_overrides Mapping[str, PySparkColumnOverride] | None

Per-column custom conversion callables.

None
fallback_type DataType | None

Fallback PySpark data type used in coerce mode for unsupported types. When set, overrides the default built-in StringType(). Defaults to None.

None

Returns:

Type Description
StructType

A PySpark StructType instance.

Source code in src/yads/converters/__init__.py
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
@requires_dependency("pyspark", min_version="3.1.1", import_name="pyspark.sql.types")
def to_pyspark(
    spec: YadsSpec,
    *,
    # BaseConverterConfig options
    mode: Literal["raise", "coerce"] = "coerce",
    ignore_columns: set[str] | None = None,
    include_columns: set[str] | None = None,
    column_overrides: Mapping[str, PySparkColumnOverride] | None = None,
    # PySparkConverterConfig options
    fallback_type: DataType | None = None,
) -> StructType:
    """Convert a `YadsSpec` to a PySpark `StructType`.

    Args:
        spec: The validated yads specification to convert.
        mode: Conversion mode. "raise" raises on unsupported features;
            "coerce" adjusts with warnings. Defaults to "coerce".
        ignore_columns: Columns to exclude from conversion.
        include_columns: If provided, only these columns are included.
        column_overrides: Per-column custom conversion callables.
        fallback_type: Fallback PySpark data type used in coerce mode for unsupported types.
            When set, overrides the default built-in `StringType()`. Defaults to None.

    Returns:
        A PySpark `StructType` instance.
    """
    from . import pyspark_converter

    config = pyspark_converter.PySparkConverterConfig(
        mode=mode,
        ignore_columns=frozenset(ignore_columns) if ignore_columns else frozenset[str](),
        include_columns=frozenset(include_columns) if include_columns else None,
        column_overrides=cast(
            Mapping[str, PySparkColumnOverride], column_overrides or {}
        ),
        fallback_type=fallback_type,
    )
    return pyspark_converter.PySparkConverter(config).convert(spec)

to_sql(spec, *, dialect='spark', mode='coerce', ignore_columns=None, include_columns=None, column_overrides=None, if_not_exists=False, or_replace=False, ignore_catalog=False, ignore_database=False, fallback_type=None, **sql_options)

Convert a YadsSpec to SQL DDL.

This facade routes to the appropriate SQL converter based on dialect and forwards AST-level options to the underlying SQLGlot-based converter.

Parameters:

Name Type Description Default
spec YadsSpec

The validated yads specification to convert.

required
dialect Literal['spark', 'duckdb']

Target dialect. Supported: "spark", "duckdb".

'spark'
mode Literal['raise', 'coerce']

Conversion mode. "raise" or "coerce". Defaults to "coerce".

'coerce'
ignore_columns set[str] | None

Columns to exclude from conversion.

None
include_columns set[str] | None

If provided, only these columns are included.

None
column_overrides Mapping[str, SQLGlotColumnOverride] | None

Per-column custom AST conversion callables.

None
if_not_exists bool

Emit CREATE TABLE IF NOT EXISTS.

False
or_replace bool

Emit CREATE OR REPLACE TABLE.

False
ignore_catalog bool

Omit catalog from fully qualified table names.

False
ignore_database bool

Omit database from fully qualified table names.

False
fallback_type Type | None

Fallback SQL data type used in coerce mode for unsupported types. Defaults to None.

None
**sql_options Any

Additional formatting options forwarded to sqlglot's sql().

{}

Returns:

Type Description
str

SQL DDL string for a CREATE TABLE statement.

Raises:

Type Description
ValueError

If an unsupported dialect is provided.

Source code in src/yads/converters/__init__.py
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
@requires_dependency("sqlglot", min_version="27.0.0", import_name="sqlglot")
def to_sql(
    spec: YadsSpec,
    *,
    # Dialect routing
    dialect: Literal["spark", "duckdb"] = "spark",
    # BaseConverterConfig options (applied to AST converter)
    mode: Literal["raise", "coerce"] = "coerce",
    ignore_columns: set[str] | None = None,
    include_columns: set[str] | None = None,
    column_overrides: Mapping[str, SQLGlotColumnOverride] | None = None,
    # SQLGlotConverterConfig options
    if_not_exists: bool = False,
    or_replace: bool = False,
    ignore_catalog: bool = False,
    ignore_database: bool = False,
    fallback_type: exp.DataType.Type | None = None,
    # SQL serialization options to forward to sqlglot (e.g., pretty=True)
    **sql_options: Any,
) -> str:
    """Convert a `YadsSpec` to SQL DDL.

    This facade routes to the appropriate SQL converter based on `dialect` and
    forwards AST-level options to the underlying SQLGlot-based converter.

    Args:
        spec: The validated yads specification to convert.
        dialect: Target dialect. Supported: "spark", "duckdb".
        mode: Conversion mode. "raise" or "coerce". Defaults to "coerce".
        ignore_columns: Columns to exclude from conversion.
        include_columns: If provided, only these columns are included.
        column_overrides: Per-column custom AST conversion callables.
        if_not_exists: Emit CREATE TABLE IF NOT EXISTS.
        or_replace: Emit CREATE OR REPLACE TABLE.
        ignore_catalog: Omit catalog from fully qualified table names.
        ignore_database: Omit database from fully qualified table names.
        fallback_type: Fallback SQL data type used in coerce mode for unsupported types.
            Defaults to None.
        **sql_options: Additional formatting options forwarded to sqlglot's `sql()`.

    Returns:
        SQL DDL string for a CREATE TABLE statement.

    Raises:
        ValueError: If an unsupported dialect is provided.
    """
    from .sql.ast_converter import SQLGlotConverterConfig
    from .sql.sql_converter import SparkSQLConverter, DuckdbSQLConverter

    ast_config = SQLGlotConverterConfig(
        mode=mode,
        ignore_columns=frozenset(ignore_columns) if ignore_columns else frozenset[str](),
        include_columns=frozenset(include_columns) if include_columns else None,
        column_overrides=cast(
            Mapping[str, SQLGlotColumnOverride], column_overrides or {}
        ),
        if_not_exists=if_not_exists,
        or_replace=or_replace,
        ignore_catalog=ignore_catalog,
        ignore_database=ignore_database,
        fallback_type=fallback_type,
    )

    converter: SQLConverter
    match dialect:
        case "spark":
            converter = SparkSQLConverter(mode=mode, ast_config=ast_config)
        case "duckdb":
            converter = DuckdbSQLConverter(mode=mode, ast_config=ast_config)
        case _:
            raise ValueError("Unsupported SQL dialect. Expected 'spark' or 'duckdb'.")

    return converter.convert(spec, **sql_options)