From 4a152ee7c1f535d68b9d9d428f8a92124a8ac271 Mon Sep 17 00:00:00 2001 From: Evan Morris Date: Mon, 24 Apr 2023 02:05:17 -0400 Subject: [PATCH] removing json conversion memory saving technique from nodes in merging, it's slow and probably not necessary for nodes --- Common/merging.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Common/merging.py b/Common/merging.py index bc1048f5..728f5056 100644 --- a/Common/merging.py +++ b/Common/merging.py @@ -6,7 +6,7 @@ from Common.utils import quick_json_loads, quick_json_dumps, chunk_iterator NODE_PROPERTIES_THAT_SHOULD_BE_SETS = {SYNONYMS, NODE_TYPES} -EDGE_PROPERTIES_THAT_SHOULD_BE_SETS = {AGGREGATOR_KNOWLEDGE_SOURCES, PUBLICATIONS} +EDGE_PROPERTIES_THAT_SHOULD_BE_SETS = {AGGREGATOR_KNOWLEDGE_SOURCES, PUBLICATIONS, XREFS} def edge_key_function(edge): @@ -180,17 +180,16 @@ def merge_nodes(self, nodes): node_key = node['id'] if node_key in self.nodes: self.merged_node_counter += 1 - previous_node = quick_json_loads(self.nodes[node_key]) + previous_node = self.nodes[node_key] merged_node = entity_merging_function(previous_node, node, NODE_PROPERTIES_THAT_SHOULD_BE_SETS) - self.nodes[node_key] = quick_json_dumps(merged_node) + self.nodes[node_key] = merged_node else: - self.nodes[node_key] = quick_json_dumps(node) + self.nodes[node_key] = node return node_count # merge a list of edges (dictionaries not kgxedge objects!) into the existing list - # throw_out_duplicates will throw out duplicates, otherwise merge their attributes def merge_edges(self, edges): edge_count = 0 for edge in edges: @@ -208,7 +207,7 @@ def merge_edges(self, edges): def get_merged_nodes_jsonl(self): for node in self.nodes.values(): - yield f'{node}\n' + yield f'{quick_json_dumps(node)}\n' def get_merged_edges_jsonl(self): for edge in self.edges.values():