Skip to content

Loaders

Load external schemas or YAML specs into a canonical YadsSpec.

import pyarrow as pa
import yads

pyarrow_schema = pa.schema(
    [
        pa.field("submission_id", pa.int64(), nullable=False),
        pa.field("completion_percent", pa.decimal128(5, 2)),
        pa.field("time_taken_second", pa.int32()),
        pa.field("submitted_at", pa.timestamp("ns", tz="UTC")),
    ]
)

spec = yads.from_pyarrow(
    pyarrow_schema,
    name="prod.assessments.submissions",
    version=1,
)
print(spec)
spec prod.assessments.submissions(version=1)(
  columns=[
    submission_id: integer(bits=64)(
      constraints=[NotNullConstraint()]
    )
    completion_percent: decimal(precision=5, scale=2, bits=128)
    time_taken_second: integer(bits=32)
    submitted_at: timestamptz(unit=ns, tz=UTC)
  ]
)

The following sections outline the high-level entry point functions available in yads. You can refer to the dedicated loader documentation for their complete API reference with more customization options.

Source Helper Loader
YAML (path or stream) yads.from_yaml yads.loaders.YamlLoader
PyArrow schema yads.from_pyarrow yads.loaders.PyArrowLoader
PySpark StructType yads.from_pyspark yads.loaders.PySparkLoader
Polars schema yads.from_polars yads.loaders.PolarsLoader

Other YAML helpers: yads.from_yaml_string, yads.from_yaml_path, and yads.from_yaml_stream.

Shared customization options

PyArrow, PySpark, and Polars loaders expose the following arguments for shaping input into a valid spec:

  • mode toggles the load mode to one of raise or coerce.
  • fallback_type sets a String or Binary default when coercing unsupported source types.

Loading mode

Switch between strict (mode="raise") and permissive (mode="coerce") runs per call or via loader configuration. Here, a Dictionary column fails in raise mode.

unsupported_pyarrow_schema = pa.schema(
    [
        pa.field("submission_id", pa.int64(), nullable=False),
        pa.field(
            "attributes",
            pa.dictionary(index_type=pa.int32(), value_type=pa.string()),
        ),
    ]
)

try:
    yads.from_pyarrow(
        unsupported_pyarrow_schema,
        name="prod.assessments.submissions",
        version=1,
        mode="raise",
    )
except Exception as exc:
    print(type(exc).__name__ + ": " + str(exc))
UnsupportedFeatureError: PyArrowLoader does not support PyArrow type: 'dictionary<values=string, indices=int32, ordered=0>' (DictionaryType) for 'attributes'.

Fallback type

Provide a fallback YadsType to keep loading unsupported logical types when running in coerce mode.

spec = yads.from_pyarrow(
    unsupported_pyarrow_schema,
    name="prod.assessments.submissions",
    version=1,
    mode="coerce",
    fallback_type=ytypes.String(),
)
print(spec.columns[-1])
attributes: string

Wrapper helpers

from_yaml(source, *, encoding='utf-8')

Load a spec from a path or a file-like stream.

This convenience loader avoids ambiguity by not accepting arbitrary content strings. Pass content strings to from_yaml_string instead.

Parameters:

Name Type Description Default
source str | Path | IO[str] | IO[bytes]

A filesystem path (str or pathlib.Path) or a file-like object opened in text or binary mode.

required
encoding str

Text encoding used when reading files or decoding binary streams.

'utf-8'

Returns:

Type Description
YadsSpec

A validated immutable YadsSpec instance.

Source code in src/yads/loaders/__init__.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
def from_yaml(
    source: str | Path | IO[str] | IO[bytes], *, encoding: str = "utf-8"
) -> YadsSpec:
    """Load a spec from a path or a file-like stream.

    This convenience loader avoids ambiguity by not accepting arbitrary content
    strings. Pass content strings to `from_yaml_string` instead.

    Args:
        source: A filesystem path (`str` or `pathlib.Path`) or a file-like
            object opened in text or binary mode.
        encoding: Text encoding used when reading files or decoding binary
            streams.

    Returns:
        A validated immutable `YadsSpec` instance.
    """
    if hasattr(source, "read"):
        return from_yaml_stream(cast(IO[str] | IO[bytes], source), encoding=encoding)
    return from_yaml_path(cast(str | Path, source), encoding=encoding)

from_yaml_string(content)

Load a spec from YAML string content.

Parameters:

Name Type Description Default
content str

YAML content as a string.

required

Returns:

Type Description
YadsSpec

A validated immutable YadsSpec instance.

Source code in src/yads/loaders/__init__.py
102
103
104
105
106
107
108
109
110
111
def from_yaml_string(content: str) -> YadsSpec:
    """Load a spec from YAML string content.

    Args:
        content: YAML content as a string.

    Returns:
        A validated immutable `YadsSpec` instance.
    """
    return YamlLoader().load(content)

from_yaml_path(path, *, encoding='utf-8')

Load a spec from a YAML file path.

Parameters:

Name Type Description Default
path str | Path

Filesystem path to a YAML file.

required
encoding str

Text encoding used to read the file.

'utf-8'

Returns:

Type Description
YadsSpec

A validated immutable YadsSpec instance.

Raises:

Type Description
FileNotFoundError

If the file does not exist.

Source code in src/yads/loaders/__init__.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
def from_yaml_path(path: str | Path, *, encoding: str = "utf-8") -> YadsSpec:
    """Load a spec from a YAML file path.

    Args:
        path: Filesystem path to a YAML file.
        encoding: Text encoding used to read the file.

    Returns:
        A validated immutable `YadsSpec` instance.

    Raises:
        FileNotFoundError: If the file does not exist.
    """
    text = Path(path).read_text(encoding=encoding)
    return YamlLoader().load(text)

from_yaml_stream(stream, *, encoding='utf-8')

Load a spec from a file-like stream.

The stream is not closed by this function.

Parameters:

Name Type Description Default
stream IO[str] | IO[bytes]

File-like object opened in text or binary mode.

required
encoding str

Used only if stream is binary.

'utf-8'

Returns:

Type Description
YadsSpec

A validated immutable YadsSpec instance.

Source code in src/yads/loaders/__init__.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
def from_yaml_stream(stream: IO[str] | IO[bytes], *, encoding: str = "utf-8") -> YadsSpec:
    """Load a spec from a file-like stream.

    The stream is not closed by this function.

    Args:
        stream: File-like object opened in text or binary mode.
        encoding: Used only if `stream` is binary.

    Returns:
        A validated immutable `YadsSpec` instance.
    """
    raw = stream.read()
    text = raw.decode(encoding) if isinstance(raw, (bytes, bytearray)) else raw
    return YamlLoader().load(text)

from_dict(data)

Load a YadsSpec from a dictionary.

Parameters:

Name Type Description Default
data dict[str, Any]

The dictionary representation of the spec.

required

Returns:

Type Description
YadsSpec

A validated immutable YadsSpec instance.

Example
data = {
    "name": "users",
    "version": 1,
    "columns": [
        {
            "name": "id",
            "type": "integer",
        },
        {
            "name": "email",
            "type": "string",
        }
    ]
}
spec = from_dict(data)
Source code in src/yads/loaders/__init__.py
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def from_dict(data: dict[str, Any]) -> YadsSpec:
    """Load a `YadsSpec` from a dictionary.

    Args:
        data: The dictionary representation of the spec.

    Returns:
        A validated immutable `YadsSpec` instance.

    Example:
        ```python
        data = {
            "name": "users",
            "version": 1,
            "columns": [
                {
                    "name": "id",
                    "type": "integer",
                },
                {
                    "name": "email",
                    "type": "string",
                }
            ]
        }
        spec = from_dict(data)
        ```
    """
    return DictLoader().load(data)

from_pyarrow(schema, *, mode='coerce', fallback_type=None, name, version, description=None)

Load a spec from a pyarrow.Schema.

Parameters:

Name Type Description Default
schema Any

An instance of pyarrow.Schema.

required
mode Literal['raise', 'coerce']

Loading mode. "raise" will raise exceptions on unsupported features. "coerce" will attempt to coerce unsupported features to supported ones with warnings. Defaults to "coerce".

'coerce'
fallback_type YadsType | None

A yads type to use as fallback when an unsupported PyArrow type is encountered. Only used when mode is "coerce". Must be either String or Binary, or None. Defaults to None.

None
name str

Fully-qualified spec name to assign.

required
version int

Spec version string.

required
description str | None

Optional human-readable description.

None

Returns:

Type Description
YadsSpec

A validated immutable YadsSpec instance.

Example
import pyarrow as pa
schema = pa.schema([
    pa.field("id", pa.int64()),
    pa.field("name", pa.string()),
])
spec = from_pyarrow(schema, name="users", version=1)
Source code in src/yads/loaders/__init__.py
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
def from_pyarrow(
    schema: Any,
    *,
    mode: Literal["raise", "coerce"] = "coerce",
    fallback_type: YadsType | None = None,
    name: str,
    version: int,
    description: str | None = None,
) -> YadsSpec:
    """Load a spec from a `pyarrow.Schema`.

    Args:
        schema: An instance of `pyarrow.Schema`.
        mode: Loading mode. "raise" will raise exceptions on unsupported
            features. "coerce" will attempt to coerce unsupported features to
            supported ones with warnings. Defaults to "coerce".
        fallback_type: A yads type to use as fallback when an unsupported
            PyArrow type is encountered. Only used when mode is "coerce".
            Must be either String or Binary, or None. Defaults to None.
        name: Fully-qualified spec name to assign.
        version: Spec version string.
        description: Optional human-readable description.

    Returns:
        A validated immutable `YadsSpec` instance.

    Example:
        ```python
        import pyarrow as pa
        schema = pa.schema([
            pa.field("id", pa.int64()),
            pa.field("name", pa.string()),
        ])
        spec = from_pyarrow(schema, name="users", version=1)
        ```
    """
    from . import pyarrow_loader  # type: ignore

    config = pyarrow_loader.PyArrowLoaderConfig(mode=mode, fallback_type=fallback_type)
    loader = cast(Any, pyarrow_loader.PyArrowLoader(config))
    return loader.load(schema, name=name, version=version, description=description)

from_pyspark(schema, *, mode='coerce', fallback_type=None, name, version, description=None)

Load a spec from a pyspark.sql.types.StructType.

Parameters:

Name Type Description Default
schema Any

An instance of pyspark.sql.types.StructType.

required
mode Literal['raise', 'coerce']

Loading mode. "raise" will raise exceptions on unsupported features. "coerce" will attempt to coerce unsupported features to supported ones with warnings. Defaults to "coerce".

'coerce'
fallback_type YadsType | None

A yads type to use as fallback when an unsupported PySpark type is encountered. Only used when mode is "coerce". Must be either String or Binary, or None. Defaults to None.

None
name str

Fully-qualified spec name to assign.

required
version int

Spec version string.

required
description str | None

Optional human-readable description.

None

Returns:

Type Description
YadsSpec

A validated immutable YadsSpec instance.

Example
from pyspark.sql.types import StructType, StructField, LongType, StringType
schema = StructType([
    StructField("id", LongType(), nullable=False),
    StructField("name", StringType(), nullable=True),
])
spec = from_pyspark(schema, name="users", version=1)
Source code in src/yads/loaders/__init__.py
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
def from_pyspark(
    schema: Any,
    *,
    mode: Literal["raise", "coerce"] = "coerce",
    fallback_type: YadsType | None = None,
    name: str,
    version: int,
    description: str | None = None,
) -> YadsSpec:
    """Load a spec from a `pyspark.sql.types.StructType`.

    Args:
        schema: An instance of `pyspark.sql.types.StructType`.
        mode: Loading mode. "raise" will raise exceptions on unsupported
            features. "coerce" will attempt to coerce unsupported features to
            supported ones with warnings. Defaults to "coerce".
        fallback_type: A yads type to use as fallback when an unsupported
            PySpark type is encountered. Only used when mode is "coerce".
            Must be either String or Binary, or None. Defaults to None.
        name: Fully-qualified spec name to assign.
        version: Spec version string.
        description: Optional human-readable description.

    Returns:
        A validated immutable `YadsSpec` instance.

    Example:
        ```python
        from pyspark.sql.types import StructType, StructField, LongType, StringType
        schema = StructType([
            StructField("id", LongType(), nullable=False),
            StructField("name", StringType(), nullable=True),
        ])
        spec = from_pyspark(schema, name="users", version=1)
        ```
    """
    from . import pyspark_loader  # type: ignore

    config = pyspark_loader.PySparkLoaderConfig(mode=mode, fallback_type=fallback_type)
    loader = cast(Any, pyspark_loader.PySparkLoader(config))
    return loader.load(schema, name=name, version=version, description=description)

from_polars(schema, *, mode='coerce', fallback_type=None, name, version, description=None)

Load a spec from a polars.Schema.

Parameters:

Name Type Description Default
schema Any

An instance of polars.Schema.

required
mode Literal['raise', 'coerce']

Loading mode. "raise" will raise exceptions on unsupported features. "coerce" will attempt to coerce unsupported features to supported ones with warnings. Defaults to "coerce".

'coerce'
fallback_type YadsType | None

A yads type to use as fallback when an unsupported Polars type is encountered. Only used when mode is "coerce". Must be either String or Binary, or None. Defaults to None.

None
name str

Fully-qualified spec name to assign.

required
version int

Spec version string.

required
description str | None

Optional human-readable description.

None

Returns:

Type Description
YadsSpec

A validated immutable YadsSpec instance.

Example
import polars as pl
schema = pl.Schema({"id": pl.Int64, "name": pl.Utf8})
spec = from_polars(schema, name="users", version=1)
Source code in src/yads/loaders/__init__.py
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
def from_polars(
    schema: Any,
    *,
    mode: Literal["raise", "coerce"] = "coerce",
    fallback_type: YadsType | None = None,
    name: str,
    version: int,
    description: str | None = None,
) -> YadsSpec:
    """Load a spec from a `polars.Schema`.

    Args:
        schema: An instance of `polars.Schema`.
        mode: Loading mode. "raise" will raise exceptions on unsupported
            features. "coerce" will attempt to coerce unsupported features to
            supported ones with warnings. Defaults to "coerce".
        fallback_type: A yads type to use as fallback when an unsupported
            Polars type is encountered. Only used when mode is "coerce".
            Must be either String or Binary, or None. Defaults to None.
        name: Fully-qualified spec name to assign.
        version: Spec version string.
        description: Optional human-readable description.

    Returns:
        A validated immutable `YadsSpec` instance.

    Example:
        ```python
        import polars as pl
        schema = pl.Schema({"id": pl.Int64, "name": pl.Utf8})
        spec = from_polars(schema, name="users", version=1)
        ```
    """
    from . import polars_loader  # type: ignore

    config = polars_loader.PolarsLoaderConfig(mode=mode, fallback_type=fallback_type)
    loader = cast(Any, polars_loader.PolarsLoader(config))
    return loader.load(schema, name=name, version=version, description=description)