Skip to content

Commit a9ebbc5

Browse files
derlukeankatiyar
authored andcommitted
feat: Adding preview support to yaml datasets (kedro-org#718)
* Adding preview support to yaml datasets Signed-off-by: Lukas Innig <lukas.innig@datarobot.com> * added test Signed-off-by: Lukas Innig <lukas.innig@datarobot.com> * fix the test Signed-off-by: Lukas Innig <lukas.innig@datarobot.com> * formatting Signed-off-by: Lukas Innig <lukas.innig@datarobot.com> * Update pyspark Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Add release notes Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> --------- Signed-off-by: Lukas Innig <lukas.innig@datarobot.com> Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> Co-authored-by: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com> Co-authored-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com>
1 parent cd3c006 commit a9ebbc5

File tree

4 files changed

+43
-1
lines changed

4 files changed

+43
-1
lines changed

kedro-datasets/RELEASE.md

+6
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@
99
| `langchain.ChatCohereDataset` | A dataset for loading a ChatCohere langchain model. | `kedro_datasets_experimental.langchain` |
1010
| `langchain.OpenAIEmbeddingsDataset` | A dataset for loading a OpenAIEmbeddings langchain model. | `kedro_datasets_experimental.langchain` |
1111
| `langchain.ChatOpenAIDataset` | A dataset for loading a ChatOpenAI langchain model. | `kedro_datasets_experimental.langchain` |
12+
* Extended preview feature to `yaml.YAMLDataset`.
13+
14+
## Community contributions
15+
16+
Many thanks to the following Kedroids for contributing PRs to this release:
17+
* [Lukas Innig](https://github.com/derluke)
1218

1319

1420
# Release 3.0.1

kedro-datasets/kedro_datasets/yaml/yaml_dataset.py

+15
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
"""``YAMLDataset`` loads/saves data from/to a YAML file using an underlying
22
filesystem (e.g.: local, S3, GCS). It uses PyYAML to handle the YAML file.
33
"""
4+
45
from __future__ import annotations
56

7+
import json
68
from copy import deepcopy
79
from pathlib import PurePosixPath
810
from typing import Any
@@ -17,6 +19,8 @@
1719
get_protocol_and_path,
1820
)
1921

22+
from kedro_datasets._typing import JSONPreview
23+
2024

2125
class YAMLDataset(AbstractVersionedDataset[dict, dict]):
2226
"""``YAMLDataset`` loads/saves data from/to a YAML file using an underlying
@@ -157,3 +161,14 @@ def _invalidate_cache(self) -> None:
157161
"""Invalidate underlying filesystem caches."""
158162
filepath = get_filepath_str(self._filepath, self._protocol)
159163
self._fs.invalidate_cache(filepath)
164+
165+
def preview(self) -> JSONPreview:
166+
"""
167+
Generate a preview of the YAML dataset with a specified number of items.
168+
169+
Returns:
170+
A string representing the YAML data for previewing.
171+
"""
172+
data = self._load()
173+
174+
return JSONPreview(json.dumps(data))

kedro-datasets/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ test = [
229229
"pyarrow>=7.0; python_version >= '3.11'", # Adding to avoid numpy build errors
230230
"pyodbc~=5.0",
231231
"pyproj~=3.0",
232-
"pyspark>=3.0, <3.4; python_version < '3.11'",
232+
"pyspark>=3.0; python_version < '3.11'",
233233
"pyspark>=3.4; python_version >= '3.11'",
234234
"pytest-cov~=3.0",
235235
"pytest-mock>=1.7.1, <2.0",

kedro-datasets/tests/yaml/test_yaml_dataset.py

+21
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
import inspect
2+
import json
13
from pathlib import Path, PurePosixPath
24

35
import pandas as pd
46
import pytest
7+
import yaml
58
from fsspec.implementations.http import HTTPFileSystem
69
from fsspec.implementations.local import LocalFileSystem
710
from gcsfs import GCSFileSystem
@@ -207,3 +210,21 @@ def test_versioning_existing_dataset(
207210
Path(yaml_dataset._filepath.as_posix()).unlink()
208211
versioned_yaml_dataset.save(dummy_data)
209212
assert versioned_yaml_dataset.exists()
213+
214+
def test_preview(self, yaml_dataset, dummy_data):
215+
"""Test the preview method."""
216+
yaml_dataset.save(dummy_data)
217+
preview_data = yaml_dataset.preview()
218+
219+
# Load the data directly for comparison
220+
with yaml_dataset._fs.open(yaml_dataset._get_load_path(), mode="r") as fs_file:
221+
full_data = yaml.safe_load(fs_file)
222+
223+
expected_data = json.dumps(full_data)
224+
225+
assert (
226+
preview_data == expected_data
227+
), "The preview data does not match the expected data."
228+
assert (
229+
inspect.signature(yaml_dataset.preview).return_annotation == "JSONPreview"
230+
)

0 commit comments

Comments
 (0)