-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot.py
146 lines (124 loc) · 4.28 KB
/
plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import os
import ipdb
import pandas as pd
import matplotlib as mpl
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sns
##PARAMETERS
EXP_NAME = 'test'
MAX_ROUNDS = 699
MAX_YVAL = 14
# DEFINE OUTPUT DIR
RESULTS_DIR = os.path.join( os.getcwd())
GRAPHS_DIR = os.path.join( os.getcwd(), 'graphs')
##SET SEABORN STYLE
sns.set_style("whitegrid")
sns.set_context("paper")
##SET MATPLOTLIB STYLE
TICKS_FONTSIZE = 16
LABEL_FONTSIZE=18
LEGEND_FONTSIZE=15
mpl.rcParams['xtick.labelsize'] = TICKS_FONTSIZE
mpl.rcParams['ytick.labelsize'] = TICKS_FONTSIZE
mpl.rcParams['legend.fontsize'] = TICKS_FONTSIZE
mpl.rcParams['axes.labelsize'] = LABEL_FONTSIZE
mpl.rcParams['axes.titlesize'] = LABEL_FONTSIZE
mpl.rcParams['font.size'] = LABEL_FONTSIZE
plt.rc('legend',**{'fontsize':LEGEND_FONTSIZE})
######################### BASIC FUNCTIONS ###########################
#######################################################################
def read_results():
"""Reads the results for all the nodes and returns
a list of pandas Dataframes, one for each node"""
#Get list of nodes(files) with results
files = [f for f in os.listdir(RESULTS_DIR) if os.path.isfile(os.path.join(RESULTS_DIR, f))]
#for x in ['1','10','']
#files = filter_parallel(files,1,'1000');
#print(files)
#Load results from each node
nodes_results = {}
for node in files:
f = os.path.join(RESULTS_DIR,node)
try:
#Read csv
df = pd.read_csv(f,usecols=['start','mindTime','completionTime'])
df = df.replace('None',np.nan)
#Create column with seconds
seconds = node.split('_')[2].split('.')[0]
values = np.empty(shape=len(df),dtype=int)
values.fill(int(seconds))
seconds_df = pd.DataFrame({'second':values})
df = df.join(seconds_df)
#Create column with parallel transactions
parallel = node.split('_')[1]
values = np.empty(shape=len(df),dtype=int)
values.fill(int(parallel))
parallel_df = pd.DataFrame({'parallelTrans':values})
df = df.join(parallel_df)
#Write df
nodes_results[node] = df
#ipdb.set_trace()
except Exception, e:
print e
ipdb.set_trace()
final = pd.concat(nodes_results.values(),ignore_index=True)
#ipdb.set_trace()
return final
def plot_averages(df):
result = df.groupby(by='parallelTrans').aggregate(np.median)
result = result[['mindTime','completionTime']]/1000
result1 = df.groupby(by='parallelTrans').aggregate(np.std)
result1 = result1[['mindTime','completionTime']]/1000
#ipdb.set_trace()
#fig = plt.figure()
#result = result[['mindTime','completionTime']].set_index('')
#Print the plot
#result.plot(marker='o', xticks=result.index, logx=True, yerr=result1)
result.plot.bar(yerr=result1)
#Modify parameters
plt.xlabel('# Parallel Transactions')
plt.ylabel('Seconds')
plt.show()
#plt.xlim(0,MAX_ROUNDS)
#plt.ylim(0,MAX_YVAL)
#Dummy line to avoid bug of matplotlib that closes image right after plot
#It's a readline function, just press enter
raw_input('Done')
#You can also automatically save figures
#fig.savefig(os.path.join(GRAPHS_DIR,var.replace(' ','_')+'_per_round.png'), format='png', dpi=fig.dpi)
############################### HELPERS ##############################
#######################################################################
def getMedianDF(nodes_results,var):
""""Helper function that extracts median time series of the var
accross the various nodes"""
series = []
for name,r in nodes_results.iteritems():
#Normalize duplicate rounds
r['round'] = r['round'].apply(np.trunc)
s = r.groupby('round',axis=0).mean()[var]
s.name = name
series.append(s)
try:
df = pd.concat(series,axis=1)
except:
ipdb.set_trace()
median = df.median(axis=1)
median.name = var
return median
def getECDF(df):
"""Helper function that caclulates the ECDF of a
dataframe"""
df = df.sort_values().value_counts()
ecdf = df.sort_index().cumsum()*1./df.sum()
return ecdf
def filter_parallel(files,index,value):
"""Filter which files will be loaded"""
return filter(lambda x: x.split('_')[index] == value,files)
########################## MAIN #######################################
#######################################################################
if __name__ == '__main__':
df = read_results()
plot_averages(df)
#plot_var_ecdf_per_round(nodes_results,'HTTP Error')
#plot_comparative(nodes_results,['HTTP Error', 'UDP Error'])