File tree 4 files changed +43
-1
lines changed
4 files changed +43
-1
lines changed Original file line number Diff line number Diff line change 9
9
| ` langchain.ChatCohereDataset ` | A dataset for loading a ChatCohere langchain model. | ` kedro_datasets_experimental.langchain ` |
10
10
| ` langchain.OpenAIEmbeddingsDataset ` | A dataset for loading a OpenAIEmbeddings langchain model. | ` kedro_datasets_experimental.langchain ` |
11
11
| ` langchain.ChatOpenAIDataset ` | A dataset for loading a ChatOpenAI langchain model. | ` kedro_datasets_experimental.langchain ` |
12
+ * Extended preview feature to ` yaml.YAMLDataset ` .
13
+
14
+ ## Community contributions
15
+
16
+ Many thanks to the following Kedroids for contributing PRs to this release:
17
+ * [ Lukas Innig] ( https://github.com/derluke )
12
18
13
19
14
20
# Release 3.0.1
Original file line number Diff line number Diff line change 1
1
"""``YAMLDataset`` loads/saves data from/to a YAML file using an underlying
2
2
filesystem (e.g.: local, S3, GCS). It uses PyYAML to handle the YAML file.
3
3
"""
4
+
4
5
from __future__ import annotations
5
6
7
+ import json
6
8
from copy import deepcopy
7
9
from pathlib import PurePosixPath
8
10
from typing import Any
17
19
get_protocol_and_path ,
18
20
)
19
21
22
+ from kedro_datasets ._typing import JSONPreview
23
+
20
24
21
25
class YAMLDataset (AbstractVersionedDataset [dict , dict ]):
22
26
"""``YAMLDataset`` loads/saves data from/to a YAML file using an underlying
@@ -157,3 +161,14 @@ def _invalidate_cache(self) -> None:
157
161
"""Invalidate underlying filesystem caches."""
158
162
filepath = get_filepath_str (self ._filepath , self ._protocol )
159
163
self ._fs .invalidate_cache (filepath )
164
+
165
+ def preview (self ) -> JSONPreview :
166
+ """
167
+ Generate a preview of the YAML dataset with a specified number of items.
168
+
169
+ Returns:
170
+ A string representing the YAML data for previewing.
171
+ """
172
+ data = self ._load ()
173
+
174
+ return JSONPreview (json .dumps (data ))
Original file line number Diff line number Diff line change @@ -229,7 +229,7 @@ test = [
229
229
" pyarrow>=7.0; python_version >= '3.11'" , # Adding to avoid numpy build errors
230
230
" pyodbc~=5.0" ,
231
231
" pyproj~=3.0" ,
232
- " pyspark>=3.0, <3.4 ; python_version < '3.11'" ,
232
+ " pyspark>=3.0; python_version < '3.11'" ,
233
233
" pyspark>=3.4; python_version >= '3.11'" ,
234
234
" pytest-cov~=3.0" ,
235
235
" pytest-mock>=1.7.1, <2.0" ,
Original file line number Diff line number Diff line change
1
+ import inspect
2
+ import json
1
3
from pathlib import Path , PurePosixPath
2
4
3
5
import pandas as pd
4
6
import pytest
7
+ import yaml
5
8
from fsspec .implementations .http import HTTPFileSystem
6
9
from fsspec .implementations .local import LocalFileSystem
7
10
from gcsfs import GCSFileSystem
@@ -207,3 +210,21 @@ def test_versioning_existing_dataset(
207
210
Path (yaml_dataset ._filepath .as_posix ()).unlink ()
208
211
versioned_yaml_dataset .save (dummy_data )
209
212
assert versioned_yaml_dataset .exists ()
213
+
214
+ def test_preview (self , yaml_dataset , dummy_data ):
215
+ """Test the preview method."""
216
+ yaml_dataset .save (dummy_data )
217
+ preview_data = yaml_dataset .preview ()
218
+
219
+ # Load the data directly for comparison
220
+ with yaml_dataset ._fs .open (yaml_dataset ._get_load_path (), mode = "r" ) as fs_file :
221
+ full_data = yaml .safe_load (fs_file )
222
+
223
+ expected_data = json .dumps (full_data )
224
+
225
+ assert (
226
+ preview_data == expected_data
227
+ ), "The preview data does not match the expected data."
228
+ assert (
229
+ inspect .signature (yaml_dataset .preview ).return_annotation == "JSONPreview"
230
+ )
You can’t perform that action at this time.
0 commit comments