Skip to content

Commit

Permalink
removing json conversion memory saving technique from nodes in mergin…
Browse files Browse the repository at this point in the history
…g, it's slow and probably not necessary for nodes
  • Loading branch information
EvanDietzMorris committed Apr 24, 2023
1 parent 54a96e3 commit 4a152ee
Showing 1 changed file with 5 additions and 6 deletions.
11 changes: 5 additions & 6 deletions Common/merging.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from Common.utils import quick_json_loads, quick_json_dumps, chunk_iterator

NODE_PROPERTIES_THAT_SHOULD_BE_SETS = {SYNONYMS, NODE_TYPES}
EDGE_PROPERTIES_THAT_SHOULD_BE_SETS = {AGGREGATOR_KNOWLEDGE_SOURCES, PUBLICATIONS}
EDGE_PROPERTIES_THAT_SHOULD_BE_SETS = {AGGREGATOR_KNOWLEDGE_SOURCES, PUBLICATIONS, XREFS}


def edge_key_function(edge):
Expand Down Expand Up @@ -180,17 +180,16 @@ def merge_nodes(self, nodes):
node_key = node['id']
if node_key in self.nodes:
self.merged_node_counter += 1
previous_node = quick_json_loads(self.nodes[node_key])
previous_node = self.nodes[node_key]
merged_node = entity_merging_function(previous_node,
node,
NODE_PROPERTIES_THAT_SHOULD_BE_SETS)
self.nodes[node_key] = quick_json_dumps(merged_node)
self.nodes[node_key] = merged_node
else:
self.nodes[node_key] = quick_json_dumps(node)
self.nodes[node_key] = node
return node_count

# merge a list of edges (dictionaries not kgxedge objects!) into the existing list
# throw_out_duplicates will throw out duplicates, otherwise merge their attributes
def merge_edges(self, edges):
edge_count = 0
for edge in edges:
Expand All @@ -208,7 +207,7 @@ def merge_edges(self, edges):

def get_merged_nodes_jsonl(self):
for node in self.nodes.values():
yield f'{node}\n'
yield f'{quick_json_dumps(node)}\n'

def get_merged_edges_jsonl(self):
for edge in self.edges.values():
Expand Down

0 comments on commit 4a152ee

Please sign in to comment.