-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrandom-scores.py
53 lines (42 loc) · 1.74 KB
/
random-scores.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# imports and helper methods
import gym
import gym.spaces
import multiprocessing as mp
import psutil
import os
"""
Run each agent in this method for parallization.
Args:
args: (TpgAgent, envName, scoreList, numEpisodes, numFrames)
"""
def runAgent(envName):
env = gym.make(envName)
scoreTotal = 0 # score accumulates over all episodes
for ep in range(10): # episode loop
state = env.reset()
scoreEp = 0
for i in range(18000): # frame loop
state, reward, isDone, debug = env.step(env.action_space.sample())
scoreEp += reward # accumulate reward in score
if isDone:
break # end early if losing state
scoreTotal += scoreEp
print(str(env.env) + ': ' + str(scoreTotal/10))
env.close()
# https://stackoverflow.com/questions/42103367/limit-total-cpu-usage-in-python-multiprocessing/42130713
def limit_cpu():
p = psutil.Process(os.getpid())
p.nice(10)
# all of the titles we will be general game playing on
# we chose games that we know TPG does OK in alone
envNames = ['Alien-v0','Asteroids-v0','Atlantis-v0','BankHeist-v0',
'BattleZone-v0','Bowling-v0','Boxing-v0','Centipede-v0',
'ChopperCommand-v0','DoubleDunk-v0','FishingDerby-v0',
'Freeway-v0','Frostbite-v0','Gravitar-v0','Hero-v0',
'IceHockey-v0','Jamesbond-v0','Kangaroo-v0','Krull-v0',
'KungFuMaster-v0','MsPacman-v0','PrivateEye-v0',
'RoadRunner-v0','Skiing-v0','Tennis-v0','TimePilot-v0',
'UpNDown-v0','Venture-v0','WizardOfWor-v0','Zaxxon-v0']
processes = 2
pool = mp.Pool(processes=processes, initializer=limit_cpu)
pool.map(runAgent, [envName for envName in envNames])