@@ -89,7 +89,6 @@ def __init__(self, X, Y, config_space=None,
89
89
else :
90
90
pcs [i ] = (hp .lower , hp .upper )
91
91
92
- print (pcs )
93
92
# set forest options
94
93
forest = reg .fanova_forest ()
95
94
forest .options .num_trees = n_trees
@@ -108,7 +107,7 @@ def __init__(self, X, Y, config_space=None,
108
107
rng = reg .default_random_engine ()
109
108
else :
110
109
rng = reg .default_random_engine (seed )
111
- data = reg .data_container (X .shape [1 ])
110
+ data = reg .default_data_container (X .shape [1 ])
112
111
113
112
for i , (mn ,mx ) in enumerate (pcs ):
114
113
if (np .isnan (mx )):
@@ -142,8 +141,16 @@ def __init__(self, X, Y, config_space=None,
142
141
midpoints = []
143
142
for i , split_vals in enumerate (tree_split_values ):
144
143
if np .isnan (pcs [i ][1 ]): # categorical parameter
145
- midpoints .append (split_vals )
146
- sizes .append ( np .ones (len (split_vals )))
144
+ # check if the tree actually splits on this parameter
145
+ if len (split_vals ) > 0 :
146
+ midpoints .append (split_vals )
147
+ sizes .append ( np .ones (len (split_vals )))
148
+ # if not, simply append 0 as the value with the number
149
+ # of categories as the size, that way this parameter will
150
+ # get 0 importance from this tree.
151
+ else :
152
+ midpoints .append ((0 ,))
153
+ sizes .append ((pcs [i ][0 ],))
147
154
else :
148
155
# add bounds to split values
149
156
sv = np .array ([pcs [i ][0 ]] + list (split_vals ) + [pcs [i ][1 ]])
@@ -240,7 +247,7 @@ def __compute_marginals(self, dimensions):
240
247
for i , (m , s ) in enumerate (zip (prod_midpoints , prod_sizes )):
241
248
sample [list (dimensions )] = list (m )
242
249
ls = self .the_forest .marginal_prediction_stat_of_tree (tree_idx , sample .tolist ())
243
- print (sample , ls .mean ())
250
+ # print(sample, ls.mean())
244
251
if not np .isnan (ls .mean ()):
245
252
stat .push ( ls .mean (), np .prod (np .array (s )) * ls .sum_of_weights ())
246
253
@@ -272,12 +279,12 @@ def quantify_importance(self, dimensions):
272
279
for k in range (1 , len (dimensions )+ 1 ):
273
280
for sub_dims in it .combinations (dimensions , k ):
274
281
importance_dict [sub_dims ] = {}
275
- fractions_total = [self .V_U_total [sub_dims ][t ]/ self .trees_total_variance [t ] for t in range (self .n_trees )]
276
- fractions_individual = [self .V_U_individual [sub_dims ][t ]/ self .trees_total_variance [t ] for t in range (self .n_trees )]
277
- # TODO: clean NANs here and catch zero variance in a tree!
278
-
279
- importance_dict [sub_dims ]['individual importance' ] = np .mean (fractions_individual )
280
- importance_dict [sub_dims ]['total importance' ] = np .mean (fractions_total )
282
+ fractions_total = np . array ( [self .V_U_total [sub_dims ][t ]/ self .trees_total_variance [t ] for t in range (self .n_trees )])
283
+ fractions_individual = np . array ( [self .V_U_individual [sub_dims ][t ]/ self .trees_total_variance [t ] for t in range (self .n_trees )])
284
+ # clean NANs here to catch zero variance in a trees
285
+ indices = np . logical_and ( ~ np . isnan ( fractions_individual ), ~ np . isnan ( fractions_total ))
286
+ importance_dict [sub_dims ]['individual importance' ] = np .mean (fractions_individual [ indices ] )
287
+ importance_dict [sub_dims ]['total importance' ] = np .mean (fractions_total [ indices ] )
281
288
282
289
return (importance_dict )
283
290
0 commit comments