-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_path_link_flow.py
158 lines (142 loc) · 5.29 KB
/
run_path_link_flow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Author : xuyong
# @email : xuyong@smail.swufe.edu.cn
import time
import numpy as np
import matplotlib.pyplot as plt
from Environment.day_to_day_trans_flow import Environment
from RLBrain_DeepRL.DeepQLearning import DeepQLearning
import tensorflow as tf
def plot_link_flow(link, actions):
"""
plot the daily link flow and total cost per day w.r.t actions
:param link: which link to charge
:param actions: how much fees to charge
:return:
"""
steps = len(actions)
tc_per_day = np.zeros(steps, dtype='float32') # store reward
path_no_toll = np.zeros((steps, 12), dtype='float32') # store daily link flow
env = Environment()
s = env.reset()
for i, act in enumerate(actions):
path_no_toll[i, :] = s
one_hot = np.zeros(12, dtype='float32')
one_hot[link - 1] = 1
action = act * one_hot
r, s_, done = env.step(action=action)
s = s_
tc_per_day[i] = r
# plot the link flow changes w.r.t actions
plt.plot(path_no_toll[:, 0] / path_no_toll[0, 0], color='green', label='link1')
plt.plot(path_no_toll[:, 1] / path_no_toll[0, 1], color='red', label='link2')
plt.plot(path_no_toll[:, 2] / path_no_toll[0, 2], color='blue', label='link3')
# plt.plot(path_no_toll[:, 3]/path_no_toll[0, 3], color='orange', label='link4')
plt.plot(path_no_toll[:, 9] / path_no_toll[0, 9], color='yellow', label='link10')
plt.plot(path_no_toll[:, 11] / path_no_toll[0, 11], color='skyblue', label='link12')
plt.legend()
plt.xlim(0, steps)
plt.ylim(0.2, 2)
plt.xlabel('day')
plt.ylabel('normalized link flow')
plt.show()
# plot the daily total cost changes w.r.t actions
plt.plot(tc_per_day, color='green', label='total cost')
plt.legend()
plt.xlim(0, steps)
plt.xlabel('day')
plt.ylabel('total cost / day')
plt.show()
def main():
venv = Environment()
# all possible actions (possible fees to charge)
actions = (1. * np.arange(0, 6, dtype='float32')).tolist()
# number of possibles actions
n_actions = len(actions)
# rl brain
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.4
rl_brain = DeepQLearning(n_features=12*3, n_actions=n_actions, actions=actions,
learning_rate=0.01, e_greedy=0.15,
reward_decay=0.9, output_graph=True, double=True, EnQ=True, config=config)
# specify link to set tool booth
charged_link = 1
# number of learning epochs
epochs = 2000
# store the total reward per 10 epochs
store_total_reward = []
stored_epochs = []
# store the learned policy at the last epoch
policy = []
start_t = time.time()
from sklearn.preprocessing import StandardScaler
sd = StandardScaler()
s = venv.reset()
s_rl = np.hstack((venv.C_a_changed, s, venv.C_a_changed - s))
s_total = np.zeros(shape=(300, s_rl.shape[0]), dtype='float32')
for t in range(300):
s_total[t, :] = s_rl
a = np.random.randint(0, 6)
# action transform
one_hot = np.zeros(12, dtype='float32')
one_hot[charged_link - 1] = 1
action = a * one_hot
r, s_, done = venv.step(action)
s_rl_ = np.hstack((s, s_, s_ - s))
s = s_
s_rl = s_rl_
sd.fit(s_total)
for epoch in range(epochs):
s = venv.reset()
s_rl = np.hstack((venv.C_a_changed, s, venv.C_a_changed - s))
s_rl = sd.transform(s_rl.reshape(1, -1))[0]
# print(s_rl)
total_r = 0
delta_flows = []
for t in range(30):
a = rl_brain.choose_action(s_rl)
# action transform
one_hot = np.zeros(12, dtype='float32')
one_hot[charged_link - 1] = 1
action = a * one_hot
r, s_, done = venv.step(action)
delta_flow = np.sum(np.abs(s_ - s))
delta_flows.append(delta_flow)
s_rl_ = np.hstack((s, s_, s_ - s))
s_rl_ = sd.transform(s_rl_.reshape(1, -1))[0]
rl_brain.store_transactions(s_rl, a, s_rl_, r, done)
s = s_
s_rl = s_rl_
total_r += r
if epoch == epochs - 1:
policy.append(a)
if done:
break
rl_brain.learn()
if epoch % 5 == 0:
stored_epochs.append(epoch)
store_total_reward.append(total_r)
end_time = time.time()
print('epoch: %d, '
'total time cost per 5 epoch: %.2f ;'
'total reward: %0.2f; learning iter: %d'
% (epoch, end_time - start_t, total_r, rl_brain.learn_iter))
start_t = end_time
# plot the daily total cost changes w.r.t epochs
plt.plot(store_total_reward, color='green', label='total cost')
plt.legend()
# plt.xticks(np.arange(len(stored_epochs)).tolist(), stored_epochs)
plt.xlabel('epochs')
plt.ylabel('total cost / epoch')
plt.show()
# plot the daily actions changes
plt.plot(policy, color='green', label='fees')
plt.legend()
# plt.xticks(np.arange(len(stored_epochs)).tolist(), stored_epochs)
plt.xlabel('days')
plt.ylabel('fees / day')
plt.show()
plot_link_flow(charged_link, policy)
if __name__ == '__main__':
main()