From 4970ec174b27968486344d221d001c78c6253faa Mon Sep 17 00:00:00 2001 From: Lilferrit Date: Thu, 17 Oct 2024 15:37:43 -0700 Subject: [PATCH] Spectrum datasets save full path in peak_file field --- depthcharge/data/parsers.py | 2 +- tests/unit_tests/test_data/test_datasets.py | 4 ++-- tests/unit_tests/test_data/test_parsers.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/depthcharge/data/parsers.py b/depthcharge/data/parsers.py index 79ba27a..0cd0cbe 100644 --- a/depthcharge/data/parsers.py +++ b/depthcharge/data/parsers.py @@ -197,7 +197,7 @@ def iter_batches(self, batch_size: int | None) -> pa.RecordBatch: parsed = processor(parsed) entry = { - "peak_file": self.peak_file.name, + "peak_file": str(self.peak_file.resolve()), "scan_id": str(parsed.scan_id), "ms_level": parsed.ms_level, "precursor_mz": parsed.precursor_mz, diff --git a/tests/unit_tests/test_data/test_datasets.py b/tests/unit_tests/test_data/test_datasets.py index f9b7237..6fb06a2 100644 --- a/tests/unit_tests/test_data/test_datasets.py +++ b/tests/unit_tests/test_data/test_datasets.py @@ -47,7 +47,7 @@ def test_indexing(tokenizer, mgf_small, tmp_path): spec = dataset[0] assert len(spec) == 7 - assert spec["peak_file"] == ["small.mgf"] + assert spec["peak_file"] == [str(mgf_small.resolve())] assert spec["scan_id"] == ["0"] assert spec["ms_level"].item() == 2 assert (spec["precursor_mz"].item() - 416.2448) < 0.001 @@ -123,7 +123,7 @@ def test_load(tokenizer, tmp_path, mgf_small): dataset = SpectrumDataset.from_lance(db_path, 1) spec = dataset[0] assert len(spec) == 8 - assert spec["peak_file"] == ["small.mgf"] + assert spec["peak_file"] == [str(mgf_small.resolve())] assert spec["scan_id"] == ["0"] assert spec["ms_level"] == 2 assert (spec["precursor_mz"] - 416.2448) < 0.001 diff --git a/tests/unit_tests/test_data/test_parsers.py b/tests/unit_tests/test_data/test_parsers.py index b730a77..c353ab2 100644 --- a/tests/unit_tests/test_data/test_parsers.py +++ b/tests/unit_tests/test_data/test_parsers.py @@ -71,7 +71,7 @@ def test_mgf_and_base(mgf_small): ) expected = pl.DataFrame( { - "peak_file": [mgf_small.name] * 2, + "peak_file": [str(mgf_small.resolve())] * 2, "scan_id": ["0", "1"], "ms_level": [2, 2], "precursor_mz": [416.24474357, 257.464565],