Skip to content

Commit fc02017

Browse files
committed
v1.0.0: Added scipy requirement, tested on previous Python versions down to 3.10, added the possibility to customize classes and pass them inside the InfluenceMaximization object
1 parent bc78862 commit fc02017

File tree

140 files changed

+3520
-24
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

140 files changed

+3520
-24
lines changed

.idea/misc.xml

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/netmax.iml

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

+4-3
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ NetMax was developed with Python 3.12 and requires the installation of the follo
3232

3333
- **networkx** (version 3.3)
3434
- **numpy**
35+
- **scipy**
3536
- **tqdm**
3637
- **heapdict**
3738

@@ -55,9 +56,9 @@ This framework wants to be a useful tool for all those people who study the prob
5556
- `r`: number of simulations to execute (default is 100)
5657
- `verbose`: if `True` sets the logging level to `INFO`, otherwise displays only the minimal information
5758

58-
**Important**: `alg`, `diff_model`, `inf_prob` and `endorsement_policy` are `str` parameters, in order to prevent the user from directly importing and instantiating all the specific classes, which could have not been user-friendly.
59-
If the user, after reading the documentation, wants to customize some specific parameters, he can still change the corresponding attribute after the instantiation of the `InfluenceMaximization` object.
60-
To view all the keywords for these parameters, see the corresponding section.
59+
**Important**: `alg`, `diff_model`, `inf_prob` and `endorsement_policy` can be either `str` or class parameters:
60+
- If they are `str` parameters, they represent the `name` attribute of the corresponding class already present in the framework. This was done in order to prevent the user from directly importing and instantiating all the specific classes, which could have not been user-friendly. To view all the keywords for these parameters, see the corresponding section
61+
- Otherwise, they must extend the corresponding superclass depending on the parameters (`Algorithm` for `alg`, `DiffusionModel` for `diff_model`, `InfluenceProbability` for `inf_prob`, `EndorsementPolicy` for `endorsement_policy`). This way, the user can define his own custom classes
6162

6263
After creating the `InfluenceMaximization` object, the user may call its `run()` method, which returns:
6364

__pycache__/utils.cpython-310.pyc

6.55 KB
Binary file not shown.

__pycache__/utils.cpython-39.pyc

6.59 KB
Binary file not shown.

build/lib/netmax/__init__.py

Whitespace-only changes.

build/lib/netmax/agent.py

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import copy
2+
3+
class Agent(object):
4+
5+
def __init__(self, name: str, budget: int, id: int = -1):
6+
"""
7+
This class models an agent.
8+
:param name: The name of the agent.
9+
:param budget: The budget of the agent.
10+
:param id: The id of the agent.
11+
"""
12+
self.name: str = name
13+
self.budget: int = budget
14+
self.seed: [int] = []
15+
self.spread = 0
16+
self.id: int = id
17+
18+
def __deepcopy__(self, memodict={}):
19+
"""
20+
Makes a deep copy of the agent object.
21+
"""
22+
new_agent = Agent(self.name, self.budget)
23+
new_agent.seed = copy.deepcopy(self.seed)
24+
new_agent.spread = self.spread
25+
new_agent.id = self.id
26+
return new_agent
+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from .proxy_based.degdis import DegDis
2+
from .proxy_based.highest_out_degree import HighestOutDegree
3+
from .proxy_based.group_pr import Group_PR
4+
from .simulation_based.mcgreedy import MCGreedy
5+
from .simulation_based.celf import CELF
6+
from .simulation_based.celfpp import CELF_PP
7+
from .sketch_based.static_greedy import StaticGreedy
8+
from .sketch_based.ris import RIS
9+
from .sketch_based.tim import TIM
10+
from .sketch_based.timp import TIMp
+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import networkx as nx
2+
from netmax.agent import Agent
3+
4+
class Algorithm:
5+
6+
def __init__(self, graph: nx.DiGraph, agents: list[Agent], curr_agent_id: int, budget, diff_model, r):
7+
"""
8+
:param graph: networkx DiGraph
9+
:param agents: list of Agent
10+
:param curr_agent_id: int - index of the current agent
11+
:param budget: int - budget of the current agent
12+
:param diff_model: str - diffusion model
13+
:param r: float - discount factor
14+
"""
15+
self.graph = graph
16+
self.agents = agents
17+
self.curr_agent_id = curr_agent_id
18+
self.budget = budget
19+
self.diff_model = diff_model
20+
self.r = r
21+
22+
def set_curr_agent(self, curr_agent_id):
23+
"""
24+
Sets the current agent as the one passed.
25+
:param curr_agent_id: index of the current agent.
26+
"""
27+
self.curr_agent_id = curr_agent_id
28+
29+
def __in_some_seed_set__(self, v, agents):
30+
"""
31+
Checks if a node is in some seed set.
32+
:param v: the node to check.
33+
:param agents: the 'agents' dictionary, which contain all the seed sets.
34+
:return: True if the node is in some seed set, False otherwise.
35+
"""
36+
for a in agents:
37+
if v in a.seed:
38+
return True
39+
return False
40+
41+
def run(self):
42+
raise NotImplementedError("This method must be implemented by subclasses")

build/lib/netmax/algorithms/proxy_based/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import copy
2+
from netmax.algorithms.proxy_based.proxy_based import ProxyBasedAlgorithm
3+
from heapdict import heapdict
4+
5+
class DegDis(ProxyBasedAlgorithm):
6+
"""
7+
Paper: Chen et al. - "Efficient Influence Maximization in Social Networks".
8+
The Degree Discount heuristic is an improvement over the Highest Out-Degree algorithm. It takes into account the
9+
influence of already selected nodes and adjusts the degree of remaining nodes accordingly.
10+
"""
11+
12+
name = 'degdis'
13+
14+
def __init__(self, graph, agents, curr_agent_id, budget, diff_model, r):
15+
super().__init__(graph, agents, curr_agent_id, budget, diff_model, r)
16+
self.d = None
17+
self.t = None
18+
self.p = None
19+
self.dd = None
20+
21+
def __initialize_degree_discount__(self):
22+
"""
23+
Initializes all the data structures needed for the algorithm. Most of them are agent-dependant, so every agent
24+
has its own version of the data structure.
25+
"""
26+
# Influence probabilities for every agent, dictionary of dictionaries <agent: <vertex: influence_probability>>,
27+
# where we compute this probability as the highest edge label among the in-edges of the vertex, instead
28+
# the author of the paper sets it as a fixed value (for example 0.01)
29+
self.p = {a.id: {} for a in self.agents}
30+
self.t = {a.id: {} for a in self.agents} # Number of adjacent vertices that are in the seed set,
31+
# dictionary of dictionaries <agent: <vertex: adjacent_vertices_in_ss>>
32+
self.dd = {a.id: heapdict() for a in self.agents} # Degree discount heuristic, dictionary <agent: heapdict>
33+
self.d = {} # Degree of each vertex, dictionary <vertex: degree>
34+
# Build the node degrees
35+
for u in self.graph.nodes():
36+
self.d[u] = self.graph.out_degree(u)
37+
# Initialize the heuristic value as the current degree (negative because of the min-heap),
38+
# and the number of adjacent vertices that are in the seed set (at this moment 0 of course)
39+
for a in self.agents:
40+
self.dd[a.id][u] = -self.d[u]
41+
self.t[a.id][u] = 0
42+
43+
def __delete_from_dd__(self, v):
44+
"""
45+
Removes the node v from the degree discount dictionary.
46+
:param v: The node to remove.
47+
"""
48+
for a in self.agents:
49+
del self.dd[a.id][v]
50+
51+
def __compute_node_score__(self, v):
52+
"""
53+
:return: the score of the degree discount heuristic for the node v, as shown in the paper. Only difference is
54+
that the paper works with fixed-value influence probabilities, while we extend this considering different
55+
probability values by taking the highest edge label among the in-edges of the vertex.
56+
"""
57+
return self.d[v] - 2 * self.t[self.curr_agent_id][v] - (self.d[v] - self.t[self.curr_agent_id][v]) * self.t[self.curr_agent_id][v] * self.p[self.curr_agent_id][v]
58+
59+
def run(self):
60+
"""
61+
:return: The nodes to add in the seed set of the current agent and the spreads for each agent.
62+
"""
63+
# This method is necessary since when the input network is signed, the graph of the proxy-based algorithm
64+
# contains only the trust-edges (see super-class ProxyBasedAlgorithm)
65+
self.__update_active_nodes__()
66+
# Initialize degrees and degree discounts if it's the first turn of the first round
67+
if self.dd is None:
68+
self.__initialize_degree_discount__()
69+
# Add vertices to the seed set of the current agent
70+
agents_copy = copy.deepcopy(self.agents)
71+
# Repeats until the budget is fulfilled (in the InfluenceMaximization class, inside the run method, the algorithm
72+
# is always invoked with the budget parameter set to 1, but we preferred to write the code in a more general way)
73+
for _ in range(self.budget):
74+
# Select the node with the maximum value of the degree discount heuristic
75+
u, _ = self.dd[self.curr_agent_id].peekitem()
76+
agents_copy[self.curr_agent_id].seed.append(u) # Add it into the seed set of the current agent
77+
self.__delete_from_dd__(u) # Delete u from the degree discount of all agents
78+
for v in self.graph[u]: # Neighbors of node u
79+
if not self.__in_some_seed_set__(v, agents_copy): # If the node is not part of any seed set
80+
# Compute influence probability of node v as the maximum edge label
81+
# among his in-edges (different from the paper)
82+
if v not in self.p[self.curr_agent_id]: # If v hasn't been reached yet
83+
self.p[self.curr_agent_id][v] = self.graph.edges[u, v]['p']
84+
elif self.p[self.curr_agent_id][v] < self.graph.edges[u, v]['p']:
85+
self.p[self.curr_agent_id][v] = self.graph.edges[u, v]['p']
86+
self.t[self.curr_agent_id][v] += 1 # Increase the number of selected neighbors
87+
score = self.__compute_node_score__(v) # Compute the degree-discount heuristic of node v
88+
self.dd[self.curr_agent_id][v] = -score
89+
# Return the new nodes to add to the seed set and the spread (which is 0 because we didn't do any simulation,
90+
# in fact this is only a fictional value, since the real spread will be computed at the end of the game)
91+
result_seed_set = agents_copy[self.curr_agent_id].seed[:-self.budget] if self.budget > 1 else [agents_copy[self.curr_agent_id].seed[-1]]
92+
return result_seed_set, {a.name: 0 for a in self.agents}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
import copy
2+
import networkx as nx
3+
from heapdict import heapdict
4+
from netmax.algorithms.proxy_based.proxy_based import ProxyBasedAlgorithm
5+
6+
class Group_PR(ProxyBasedAlgorithm):
7+
"""
8+
Paper: Liu et al. - "Influence Maximization over Large-Scale Social Networks A Bounded Linear Approach".
9+
Group-PageRank starts from the fact that PageRank as un upper bound to the influence of single nodes under
10+
linear influence processes (and it's called influence-PageRank), and extends this concept to compute the
11+
influence of groups of nodes via the so-called Group-PageRank. Then it plugs this heuristic into a linear
12+
framework to maximize the influence spread.
13+
"""
14+
15+
name = 'group_pr'
16+
17+
def __init__(self, graph, agents, curr_agent_id, budget, diff_model, r):
18+
super().__init__(graph, agents, curr_agent_id, budget, diff_model, r)
19+
self.d = 0.85 # Parameter of PageRank
20+
# PageRank works with incoming links, but the influence propagation has only sense considering outgoing links,
21+
# so we use the inverted graph to compute PageRank
22+
self.inverted_graph = self.graph.reverse(copy=True)
23+
self.influencee = list(self.graph.nodes) # Nodes that can be influenced for each agent
24+
self.delta_dict = None # Dictionary of heaps which store the value of delta for each node and for each agent
25+
26+
def __initialize_delta_dict__(self):
27+
"""
28+
Initialize the dictionary of heaps with the same heap for each agent as in the beginning the delta value
29+
is the same.
30+
"""
31+
# Compute influence-PageRank with a bias towards the nodes that can be influenced
32+
personalization = {u: 1 / len(self.influencee) for u in self.influencee}
33+
fPR = nx.pagerank(self.inverted_graph, alpha=self.d, personalization=personalization, weight='p')
34+
curr_delta_dict = heapdict()
35+
for s in self.graph.nodes():
36+
# Formula in the paper, negative because we have to insert in the heap which orders in descending order
37+
curr_delta_dict[s] = - ((len(self.influencee) / (1 - self.d)) * fPR[s])
38+
self.delta_dict = {a.id: copy.deepcopy(curr_delta_dict) for a in self.agents}
39+
40+
def __remove_node_from_heaps__(self, v):
41+
"""
42+
Removes a node from all heaps.
43+
:param v: the node to remove.
44+
"""
45+
for a in self.agents:
46+
del self.delta_dict[a.id][v]
47+
48+
def __get_delta_bound__(self, seed_set, s):
49+
"""
50+
Method used to update the entries of the delta dictionary. In the paper there are two ways to do so: a linear
51+
approach or a bound approach. We chose to implement the bound approach with this method.
52+
:param seed_set: the seed set.
53+
:param s: the node which delta value has to be computed.
54+
:return: the value of delta for the node s.
55+
"""
56+
# If no node can be influenced, compute the influence-PageRank with the default personalization vector
57+
if len(self.influencee) == 0:
58+
fPR = nx.pagerank(self.inverted_graph, alpha=self.d, weight='p')
59+
# Otherwise compute the influence-PageRank with a bias towards the nodes that can be influenced
60+
else:
61+
personalization = {u: 1 / len(self.influencee) for u in self.influencee}
62+
fPR = nx.pagerank(self.inverted_graph, alpha=self.d, personalization=personalization, weight='p')
63+
# Initialize the value of delta with the influence-PageRank of the node
64+
delta_s = fPR[s]
65+
# For each node j in the seed set, subtract two contributions from the current value of delta:
66+
# 1) The influence-PageRank of node s multiplied by the weight of the edge (j,s), if exists
67+
# 2) The influence-PageRank of node j multiplied by the weight of the edge (s,j), if exists
68+
for j in seed_set:
69+
p_js = self.graph.edges[j, s]['p'] if self.graph.has_edge(j, s) else 0
70+
p_sj = self.graph.edges[s, j]['p'] if self.graph.has_edge(s, j) else 0
71+
delta_s = delta_s - self.d * p_js * fPR[s] - self.d * p_sj * fPR[j]
72+
# Formula inside the paper
73+
return delta_s * (len(self.influencee) / (1 - self.d))
74+
75+
def run(self):
76+
"""
77+
:return: The nodes to add in the seed set of the current agent and the spreads for each agent.
78+
"""
79+
# This method is necessary since when the input network is signed, the graph of the proxy-based algorithm
80+
# contains only the trust-edges (see super-class ProxyBasedAlgorithm)
81+
self.__update_active_nodes__()
82+
# Initialize the delta dictionary if it's the first turn of the first round
83+
if self.delta_dict is None:
84+
self.__initialize_delta_dict__()
85+
agents_copy = copy.deepcopy(self.agents)
86+
# Repeats until the budget is fulfilled (in the InfluenceMaximization class, inside the run method, the algorithm
87+
# is always invoked with the budget parameter set to 1, but we preferred to write the code in a more general way)
88+
added_nodes = 0
89+
while added_nodes < self.budget:
90+
# Take the node which has the maximum value of delta. The heap property guarantees that
91+
# the first item is the one with the highest value of delta
92+
s, neg_delta = self.delta_dict[self.curr_agent_id].popitem()
93+
# Update this node's delta value with bound method and reinsert the node into the heap
94+
self.delta_dict[self.curr_agent_id][s] = -self.__get_delta_bound__(agents_copy[self.curr_agent_id].seed, s)
95+
# If it's still the node with the highest value of delta
96+
if s == self.delta_dict[self.curr_agent_id].peekitem()[0]:
97+
s_max, _ = self.delta_dict[self.curr_agent_id].peekitem()
98+
agents_copy[self.curr_agent_id].seed.append(s_max) # Add it into the seed set of the current agent
99+
self.__remove_node_from_heaps__(s_max) # And remove it from all the heaps
100+
self.influencee.remove(s_max) # Remove it also from the set of nodes that can be influenced
101+
added_nodes += 1
102+
# Return the new nodes to add to the seed set and the spread (which is 0 because we didn't do any simulation,
103+
# in fact this is only a fictional value, since the real spread will be computed at the end of the game)
104+
result_seed_set = agents_copy[self.curr_agent_id].seed[:-self.budget] if self.budget > 1 else [agents_copy[self.curr_agent_id].seed[-1]]
105+
return result_seed_set, {a.name: 0 for a in self.agents}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from networkx import DiGraph
2+
from netmax.algorithms.proxy_based.proxy_based import ProxyBasedAlgorithm
3+
from netmax import influence_maximization as im
4+
5+
6+
class HighestOutDegree(ProxyBasedAlgorithm):
7+
"""
8+
The Highest Out-Degree algorithm selects nodes based on their out-degree,
9+
which is the number of edges directed outwards from a node.
10+
The idea is that nodes with higher out-degree have more influence over other nodes in the network.
11+
"""
12+
13+
name = 'outdeg'
14+
15+
def __init__(self, graph: DiGraph, agents, curr_agent_id, budget, diff_model, r):
16+
super().__init__(graph, agents, curr_agent_id, budget, diff_model, r)
17+
self.out_deg_ranking = None
18+
19+
def run(self):
20+
"""
21+
:return: The nodes to add in the seed set of the current agent and the spreads for each agent.
22+
"""
23+
# This method is necessary since when the input network is signed, the graph of the proxy-based algorithm
24+
# contains only the trust-edges (see super-class ProxyBasedAlgorithm)
25+
self.__update_active_nodes__()
26+
# Compute the out-degrees if not already done
27+
if self.out_deg_ranking is None:
28+
self.out_deg_ranking = sorted(im.inactive_nodes(self.graph), key=lambda node: self.graph.out_degree(node))
29+
# Iteratively, take the nodes with the highest out-degree.
30+
# Repeats until the budget is fulfilled (in the InfluenceMaximization class, inside the run method, the algorithm
31+
# is always invoked with the budget parameter set to 1, but we preferred to write the code in a more general way)
32+
seed_set = []
33+
for _ in range(self.budget):
34+
seed_set.append(self.out_deg_ranking.pop())
35+
# Return the new nodes to add to the seed set and the spread (which is 0 because we didn't do any simulation,
36+
# in fact this is only a fictional value, since the real spread will be computed at the end of the game)
37+
return seed_set, {a.name: 0 for a in self.agents}

0 commit comments

Comments
 (0)