Skip to content

Commit f157e1a

Browse files
fix(telemetry): Single project identifier (kedro-org#701)
* Moved pyproject config name to constant Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Implemented _get_or_create_project_uuid Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Refactored _get_project_properties Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Fixed tests from hanging Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Fixed _is_known_ci_env Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Fixed test_before_command_run Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Fixed TestKedroTelemetryCLIHooks Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Fixed TestKedroTelemetryProjectHooks Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Fixed writing to pyproject.toml Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Fixed write mock Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Removed debug output Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Refactored _add_tool_properties Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Added debug message when pyproject_path does not exist Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Updated release notes Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Return None as project UUID in case of not generated Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Fixed pre-commit errors Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Updated the way project UUID is stored Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Renamed project_uuid -> project_id Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Check if pyproject file relates to kedro Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Changed debug message as suggested Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Added OSError handling Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> * Fixed unit test Signed-off-by: Elena Khaustova <ymax70rus@gmail.com> --------- Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>
1 parent 34cdffc commit f157e1a

File tree

3 files changed

+109
-34
lines changed

3 files changed

+109
-34
lines changed

kedro-telemetry/RELEASE.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# Upcoming release
2+
* Updated the plugin to generate a unique project UUID for kedro project and store it in `pyproject.toml`.
23

34
# Release 0.4.0
4-
* Updated the plugin to generate an unique UUID for each user of `kedro-telemetry`.
5+
* Updated the plugin to generate a unique UUID for each user of `kedro-telemetry`.
56
* Added support for Python 3.12.
67

78
# Release 0.3.2

kedro-telemetry/kedro_telemetry/plugin.py

+72-23
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@
4545
}
4646
TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
4747
CONFIG_FILENAME = "telemetry.toml"
48+
PYPROJECT_CONFIG_NAME = "pyproject.toml"
49+
UNDEFINED_PACKAGE_NAME = "undefined_package_name"
4850

4951
logger = logging.getLogger(__name__)
5052

@@ -78,10 +80,63 @@ def _get_or_create_uuid() -> str:
7880
return ""
7981

8082

83+
def _get_or_create_project_id(pyproject_path: Path) -> str | None:
84+
"""
85+
Reads a project id from a configuration file or generates and saves a new one if not present.
86+
Returns None if configuration file does not exist or does not relate to Kedro.
87+
"""
88+
try:
89+
with open(pyproject_path, "r+") as file:
90+
pyproject_data = toml.load(file)
91+
92+
# Check if pyproject related to kedro
93+
try:
94+
_ = pyproject_data["tool"]["kedro"]
95+
try:
96+
project_id = pyproject_data["tool"]["kedro_telemetry"]["project_id"]
97+
except KeyError:
98+
project_id = uuid.uuid4().hex
99+
toml_string = (
100+
f'\n[tool.kedro_telemetry]\nproject_id = "{project_id}"\n'
101+
)
102+
file.write(toml_string)
103+
return project_id
104+
except KeyError:
105+
logging.error(
106+
f"Failed to retrieve project id or save project id: "
107+
f"{str(pyproject_path)} does not contain a [tool.kedro] section"
108+
)
109+
return None
110+
except OSError as exc:
111+
logging.error(f"Failed to read the file: {str(pyproject_path)}.\n{str(exc)}")
112+
return None
113+
114+
115+
def _add_tool_properties(
116+
properties: dict[str, Any], pyproject_path: Path
117+
) -> dict[str, Any]:
118+
"""
119+
Extends project properties with tool's properties.
120+
"""
121+
if pyproject_path.exists():
122+
with open(pyproject_path) as file:
123+
pyproject_data = toml.load(file)
124+
125+
try:
126+
tool_kedro = pyproject_data["tool"]["kedro"]
127+
if "tools" in tool_kedro:
128+
properties["tools"] = ", ".join(tool_kedro["tools"])
129+
if "example_pipeline" in tool_kedro:
130+
properties["example_pipeline"] = tool_kedro["example_pipeline"]
131+
except KeyError:
132+
pass
133+
134+
return properties
135+
136+
81137
def _generate_new_uuid(full_path: str) -> str:
82138
try:
83-
config: dict[str, dict[str, Any]] = {}
84-
config["telemetry"] = {}
139+
config: dict[str, dict[str, Any]] = {"telemetry": {}}
85140
new_uuid = uuid.uuid4().hex
86141
config["telemetry"]["uuid"] = new_uuid
87142

@@ -126,7 +181,7 @@ def before_command_run(
126181
logger.debug("You have opted into product usage analytics.")
127182
user_uuid = _get_or_create_uuid()
128183
project_properties = _get_project_properties(
129-
user_uuid, project_metadata.project_path
184+
user_uuid, project_metadata.project_path / PYPROJECT_CONFIG_NAME
130185
)
131186
cli_properties = _format_user_cli_data(
132187
project_properties, masked_command_args
@@ -177,7 +232,9 @@ def after_catalog_created(self, catalog):
177232
default_pipeline = pipelines.get("__default__") # __default__
178233
user_uuid = _get_or_create_uuid()
179234

180-
project_properties = _get_project_properties(user_uuid, self.project_path)
235+
project_properties = _get_project_properties(
236+
user_uuid, self.project_path / PYPROJECT_CONFIG_NAME
237+
)
181238

182239
project_statistics_properties = _format_project_statistics_data(
183240
project_properties, catalog, default_pipeline, pipelines
@@ -189,40 +246,32 @@ def after_catalog_created(self, catalog):
189246
)
190247

191248

192-
def _is_known_ci_env(known_ci_env_var_keys=KNOWN_CI_ENV_VAR_KEYS):
249+
def _is_known_ci_env(known_ci_env_var_keys: set[str]):
193250
# Most CI tools will set the CI environment variable to true
194251
if os.getenv("CI") == "true":
195252
return True
196253
# Not all CI tools follow this convention, we can check through those that don't
197254
return any(os.getenv(key) for key in known_ci_env_var_keys)
198255

199256

200-
def _get_project_properties(user_uuid: str, project_path: Path) -> dict:
201-
hashed_package_name = _hash(str(PACKAGE_NAME)) if PACKAGE_NAME else "undefined"
257+
def _get_project_properties(user_uuid: str, pyproject_path: Path) -> dict:
258+
project_id = _get_or_create_project_id(pyproject_path)
259+
package_name = PACKAGE_NAME or UNDEFINED_PACKAGE_NAME
260+
hashed_project_id = (
261+
_hash(f"{project_id}{package_name}") if project_id is not None else None
262+
)
263+
202264
properties = {
203265
"username": user_uuid,
204-
"package_name": hashed_package_name,
266+
"project_id": hashed_project_id,
205267
"project_version": KEDRO_VERSION,
206268
"telemetry_version": TELEMETRY_VERSION,
207269
"python_version": sys.version,
208270
"os": sys.platform,
209-
"is_ci_env": _is_known_ci_env(),
271+
"is_ci_env": _is_known_ci_env(KNOWN_CI_ENV_VAR_KEYS),
210272
}
211-
pyproject_path = Path(project_path) / "pyproject.toml"
212-
if pyproject_path.exists():
213-
with open(pyproject_path) as file:
214-
pyproject_data = toml.load(file)
215273

216-
if "tool" in pyproject_data and "kedro" in pyproject_data["tool"]:
217-
if "tools" in pyproject_data["tool"]["kedro"]:
218-
# convert list of tools to comma-separated string
219-
properties["tools"] = ", ".join(
220-
pyproject_data["tool"]["kedro"]["tools"]
221-
)
222-
if "example_pipeline" in pyproject_data["tool"]["kedro"]:
223-
properties["example_pipeline"] = pyproject_data["tool"]["kedro"][
224-
"example_pipeline"
225-
]
274+
properties = _add_tool_properties(properties, pyproject_path)
226275

227276
return properties
228277

kedro-telemetry/tests/test_plugin.py

+35-10
Original file line numberDiff line numberDiff line change
@@ -134,14 +134,18 @@ def test_before_command_run(self, mocker, fake_metadata):
134134
"kedro_telemetry.plugin._get_or_create_uuid",
135135
return_value="user_uuid",
136136
)
137+
mocker.patch(
138+
"kedro_telemetry.plugin._get_or_create_project_id",
139+
return_value="project_id",
140+
)
137141

138142
mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
139143
telemetry_hook = KedroTelemetryCLIHooks()
140144
command_args = ["--version"]
141145
telemetry_hook.before_command_run(fake_metadata, command_args)
142146
expected_properties = {
143147
"username": "user_uuid",
144-
"package_name": "digested",
148+
"project_id": "digested",
145149
"project_version": kedro_version,
146150
"telemetry_version": TELEMETRY_VERSION,
147151
"python_version": sys.version,
@@ -180,6 +184,10 @@ def test_before_command_run_with_tools(self, mocker, fake_metadata):
180184
"kedro_telemetry.plugin._get_or_create_uuid",
181185
return_value="user_uuid",
182186
)
187+
mocker.patch(
188+
"kedro_telemetry.plugin._get_or_create_project_id",
189+
return_value="project_id",
190+
)
183191

184192
mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
185193
mocker.patch("builtins.open", mocker.mock_open(read_data=MOCK_PYPROJECT_TOOLS))
@@ -189,7 +197,7 @@ def test_before_command_run_with_tools(self, mocker, fake_metadata):
189197
telemetry_hook.before_command_run(fake_metadata, command_args)
190198
expected_properties = {
191199
"username": "user_uuid",
192-
"package_name": "digested",
200+
"project_id": "digested",
193201
"project_version": kedro_version,
194202
"telemetry_version": TELEMETRY_VERSION,
195203
"python_version": sys.version,
@@ -230,14 +238,18 @@ def test_before_command_run_empty_args(self, mocker, fake_metadata):
230238
"kedro_telemetry.plugin._get_or_create_uuid",
231239
return_value="user_uuid",
232240
)
241+
mocker.patch(
242+
"kedro_telemetry.plugin._get_or_create_project_id",
243+
return_value="project_id",
244+
)
233245

234246
mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
235247
telemetry_hook = KedroTelemetryCLIHooks()
236248
command_args = []
237249
telemetry_hook.before_command_run(fake_metadata, command_args)
238250
expected_properties = {
239251
"username": "user_uuid",
240-
"package_name": "digested",
252+
"project_id": "digested",
241253
"project_version": kedro_version,
242254
"telemetry_version": TELEMETRY_VERSION,
243255
"python_version": sys.version,
@@ -300,7 +312,7 @@ def test_before_command_run_anonymous(self, mocker, fake_metadata):
300312
mocked_anon_id = mocker.patch("kedro_telemetry.plugin._hash")
301313
mocked_anon_id.return_value = "digested"
302314
mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights")
303-
mocker.patch("builtins.open", side_effect=Exception)
315+
mocker.patch("builtins.open", side_effect=OSError)
304316

305317
mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
306318
telemetry_hook = KedroTelemetryCLIHooks()
@@ -309,7 +321,7 @@ def test_before_command_run_anonymous(self, mocker, fake_metadata):
309321
expected_properties = {
310322
"username": "",
311323
"command": "kedro --version",
312-
"package_name": "digested",
324+
"project_id": None,
313325
"project_version": kedro_version,
314326
"telemetry_version": TELEMETRY_VERSION,
315327
"python_version": sys.version,
@@ -481,9 +493,15 @@ def test_after_context_created_without_kedro_run( # noqa: PLR0913
481493
"kedro_telemetry.plugin._get_or_create_uuid",
482494
return_value="user_uuid",
483495
)
496+
mocker.patch(
497+
"kedro_telemetry.plugin._get_or_create_project_id",
498+
return_value="project_id",
499+
)
500+
484501
mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
485502
mocker.patch("kedro_telemetry.plugin.open")
486503
mocker.patch("kedro_telemetry.plugin.toml.load")
504+
mocker.patch("kedro_telemetry.plugin.toml.dump")
487505

488506
# Without CLI invoked - i.e. `session.run` in Jupyter/IPython
489507
telemetry_hook = KedroTelemetryProjectHooks()
@@ -492,7 +510,7 @@ def test_after_context_created_without_kedro_run( # noqa: PLR0913
492510

493511
project_properties = {
494512
"username": "user_uuid",
495-
"package_name": "digested",
513+
"project_id": "digested",
496514
"project_version": kedro_version,
497515
"telemetry_version": TELEMETRY_VERSION,
498516
"python_version": sys.version,
@@ -505,7 +523,6 @@ def test_after_context_created_without_kedro_run( # noqa: PLR0913
505523
"number_of_pipelines": 2,
506524
}
507525
expected_properties = {**project_properties, **project_statistics}
508-
509526
expected_call = mocker.call(
510527
event_name="Kedro Project Statistics",
511528
identity="user_uuid",
@@ -537,8 +554,13 @@ def test_after_context_created_with_kedro_run( # noqa: PLR0913
537554
"kedro_telemetry.plugin._get_or_create_uuid",
538555
return_value="user_uuid",
539556
)
557+
mocker.patch(
558+
"kedro_telemetry.plugin._get_or_create_project_id",
559+
return_value="project_id",
560+
)
540561
mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
541562
mocker.patch("kedro_telemetry.plugin.toml.load")
563+
mocker.patch("kedro_telemetry.plugin.toml.dump")
542564
# CLI run first
543565
telemetry_cli_hook = KedroTelemetryCLIHooks()
544566
command_args = ["--version"]
@@ -551,7 +573,7 @@ def test_after_context_created_with_kedro_run( # noqa: PLR0913
551573

552574
project_properties = {
553575
"username": "user_uuid",
554-
"package_name": "digested",
576+
"project_id": "digested",
555577
"project_version": kedro_version,
556578
"telemetry_version": TELEMETRY_VERSION,
557579
"python_version": sys.version,
@@ -596,6 +618,10 @@ def test_after_context_created_with_kedro_run_and_tools( # noqa: PLR0913
596618
"kedro_telemetry.plugin._get_or_create_uuid",
597619
return_value="user_uuid",
598620
)
621+
mocker.patch(
622+
"kedro_telemetry.plugin._get_or_create_project_id",
623+
return_value="project_id",
624+
)
599625
mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
600626
mocker.patch("builtins.open", mocker.mock_open(read_data=MOCK_PYPROJECT_TOOLS))
601627
mocker.patch("pathlib.Path.exists", return_value=True)
@@ -612,7 +638,7 @@ def test_after_context_created_with_kedro_run_and_tools( # noqa: PLR0913
612638

613639
project_properties = {
614640
"username": "user_uuid",
615-
"package_name": "digested",
641+
"project_id": "digested",
616642
"project_version": kedro_version,
617643
"telemetry_version": TELEMETRY_VERSION,
618644
"python_version": sys.version,
@@ -633,7 +659,6 @@ def test_after_context_created_with_kedro_run_and_tools( # noqa: PLR0913
633659
identity="user_uuid",
634660
properties=expected_properties,
635661
)
636-
637662
# CLI hook makes the first 2 calls, the 3rd one is the Project hook
638663
assert mocked_heap_call.call_args_list[2] == expected_call
639664

0 commit comments

Comments
 (0)